diff options
Diffstat (limited to 'C/ZstdDec.c')
-rw-r--r-- | C/ZstdDec.c | 4064 |
1 files changed, 4064 insertions, 0 deletions
diff --git a/C/ZstdDec.c b/C/ZstdDec.c new file mode 100644 index 0000000..ecf6d22 --- /dev/null +++ b/C/ZstdDec.c | |||
@@ -0,0 +1,4064 @@ | |||
1 | /* ZstdDec.c -- Zstd Decoder | ||
2 | 2024-01-21 : the code was developed by Igor Pavlov, using Zstandard format | ||
3 | specification and original zstd decoder code as reference code. | ||
4 | original zstd decoder code: Copyright (c) Facebook, Inc. All rights reserved. | ||
5 | This source code is licensed under BSD 3-Clause License. | ||
6 | */ | ||
7 | |||
8 | #include "Precomp.h" | ||
9 | |||
10 | #include <string.h> | ||
11 | #include <stdlib.h> | ||
12 | // #include <stdio.h> | ||
13 | |||
14 | #include "Alloc.h" | ||
15 | #include "Xxh64.h" | ||
16 | #include "ZstdDec.h" | ||
17 | #include "CpuArch.h" | ||
18 | |||
19 | #if defined(MY_CPU_ARM64) | ||
20 | #include <arm_neon.h> | ||
21 | #endif | ||
22 | |||
23 | /* original-zstd still doesn't support window larger than 2 GiB. | ||
24 | So we also limit our decoder for 2 GiB window: */ | ||
25 | #if defined(MY_CPU_64BIT) && 0 == 1 | ||
26 | #define MAX_WINDOW_SIZE_LOG 41 | ||
27 | #else | ||
28 | #define MAX_WINDOW_SIZE_LOG 31 | ||
29 | #endif | ||
30 | |||
31 | typedef | ||
32 | #if MAX_WINDOW_SIZE_LOG < 32 | ||
33 | UInt32 | ||
34 | #else | ||
35 | size_t | ||
36 | #endif | ||
37 | CZstdDecOffset; | ||
38 | |||
39 | // for debug: simpler and smaller code but slow: | ||
40 | // #define Z7_ZSTD_DEC_USE_HUF_STREAM1_ALWAYS | ||
41 | |||
42 | // #define SHOW_STAT | ||
43 | #ifdef SHOW_STAT | ||
44 | #include <stdio.h> | ||
45 | static unsigned g_Num_Blocks_Compressed = 0; | ||
46 | static unsigned g_Num_Blocks_memcpy = 0; | ||
47 | static unsigned g_Num_Wrap_memmove_Num = 0; | ||
48 | static unsigned g_Num_Wrap_memmove_Bytes = 0; | ||
49 | static unsigned g_NumSeqs_total = 0; | ||
50 | // static unsigned g_NumCopy = 0; | ||
51 | static unsigned g_NumOver = 0; | ||
52 | static unsigned g_NumOver2 = 0; | ||
53 | static unsigned g_Num_Match = 0; | ||
54 | static unsigned g_Num_Lits = 0; | ||
55 | static unsigned g_Num_LitsBig = 0; | ||
56 | static unsigned g_Num_Lit0 = 0; | ||
57 | static unsigned g_Num_Rep0 = 0; | ||
58 | static unsigned g_Num_Rep1 = 0; | ||
59 | static unsigned g_Num_Rep2 = 0; | ||
60 | static unsigned g_Num_Rep3 = 0; | ||
61 | static unsigned g_Num_Threshold_0 = 0; | ||
62 | static unsigned g_Num_Threshold_1 = 0; | ||
63 | static unsigned g_Num_Threshold_0sum = 0; | ||
64 | static unsigned g_Num_Threshold_1sum = 0; | ||
65 | #define STAT_UPDATE(v) v | ||
66 | #else | ||
67 | #define STAT_UPDATE(v) | ||
68 | #endif | ||
69 | #define STAT_INC(v) STAT_UPDATE(v++;) | ||
70 | |||
71 | |||
72 | typedef struct | ||
73 | { | ||
74 | const Byte *ptr; | ||
75 | size_t len; | ||
76 | } | ||
77 | CInBufPair; | ||
78 | |||
79 | |||
80 | #if defined(MY_CPU_ARM_OR_ARM64) || defined(MY_CPU_X86_OR_AMD64) | ||
81 | #if (defined(__clang__) && (__clang_major__ >= 6)) \ | ||
82 | || (defined(__GNUC__) && (__GNUC__ >= 6)) | ||
83 | // disable for debug: | ||
84 | #define Z7_ZSTD_DEC_USE_BSR | ||
85 | #elif defined(_MSC_VER) && (_MSC_VER >= 1300) | ||
86 | // #if defined(MY_CPU_ARM_OR_ARM64) | ||
87 | #if (_MSC_VER >= 1600) | ||
88 | #include <intrin.h> | ||
89 | #endif | ||
90 | // disable for debug: | ||
91 | #define Z7_ZSTD_DEC_USE_BSR | ||
92 | #endif | ||
93 | #endif | ||
94 | |||
95 | #ifdef Z7_ZSTD_DEC_USE_BSR | ||
96 | #if defined(__clang__) || defined(__GNUC__) | ||
97 | #define MY_clz(x) ((unsigned)__builtin_clz((UInt32)x)) | ||
98 | #else // #if defined(_MSC_VER) | ||
99 | #ifdef MY_CPU_ARM_OR_ARM64 | ||
100 | #define MY_clz _CountLeadingZeros | ||
101 | #endif // MY_CPU_X86_OR_AMD64 | ||
102 | #endif // _MSC_VER | ||
103 | #elif !defined(Z7_ZSTD_DEC_USE_LOG_TABLE) | ||
104 | #define Z7_ZSTD_DEC_USE_LOG_TABLE | ||
105 | #endif | ||
106 | |||
107 | |||
108 | static | ||
109 | Z7_FORCE_INLINE | ||
110 | unsigned GetHighestSetBit_32_nonzero_big(UInt32 num) | ||
111 | { | ||
112 | // (num != 0) | ||
113 | #ifdef MY_clz | ||
114 | return 31 - MY_clz(num); | ||
115 | #elif defined(Z7_ZSTD_DEC_USE_BSR) | ||
116 | { | ||
117 | unsigned long zz; | ||
118 | _BitScanReverse(&zz, num); | ||
119 | return zz; | ||
120 | } | ||
121 | #else | ||
122 | { | ||
123 | int i = -1; | ||
124 | for (;;) | ||
125 | { | ||
126 | i++; | ||
127 | num >>= 1; | ||
128 | if (num == 0) | ||
129 | return (unsigned)i; | ||
130 | } | ||
131 | } | ||
132 | #endif | ||
133 | } | ||
134 | |||
135 | #ifdef Z7_ZSTD_DEC_USE_LOG_TABLE | ||
136 | |||
137 | #define R1(a) a, a | ||
138 | #define R2(a) R1(a), R1(a) | ||
139 | #define R3(a) R2(a), R2(a) | ||
140 | #define R4(a) R3(a), R3(a) | ||
141 | #define R5(a) R4(a), R4(a) | ||
142 | #define R6(a) R5(a), R5(a) | ||
143 | #define R7(a) R6(a), R6(a) | ||
144 | #define R8(a) R7(a), R7(a) | ||
145 | #define R9(a) R8(a), R8(a) | ||
146 | |||
147 | #define Z7_ZSTD_FSE_MAX_ACCURACY 9 | ||
148 | // states[] values in FSE_Generate() can use (Z7_ZSTD_FSE_MAX_ACCURACY + 1) bits. | ||
149 | static const Byte k_zstd_LogTable[2 << Z7_ZSTD_FSE_MAX_ACCURACY] = | ||
150 | { | ||
151 | R1(0), R1(1), R2(2), R3(3), R4(4), R5(5), R6(6), R7(7), R8(8), R9(9) | ||
152 | }; | ||
153 | |||
154 | #define GetHighestSetBit_32_nonzero_small(num) (k_zstd_LogTable[num]) | ||
155 | #else | ||
156 | #define GetHighestSetBit_32_nonzero_small GetHighestSetBit_32_nonzero_big | ||
157 | #endif | ||
158 | |||
159 | |||
160 | #ifdef MY_clz | ||
161 | #define UPDATE_BIT_OFFSET_FOR_PADDING(b, bitOffset) \ | ||
162 | bitOffset -= (CBitCtr)(MY_clz(b) - 23); | ||
163 | #elif defined(Z7_ZSTD_DEC_USE_BSR) | ||
164 | #define UPDATE_BIT_OFFSET_FOR_PADDING(b, bitOffset) \ | ||
165 | { unsigned long zz; _BitScanReverse(&zz, b); bitOffset -= 8; bitOffset += zz; } | ||
166 | #else | ||
167 | #define UPDATE_BIT_OFFSET_FOR_PADDING(b, bitOffset) \ | ||
168 | for (;;) { bitOffset--; if (b & 0x80) { break; } b <<= 1; } | ||
169 | #endif | ||
170 | |||
171 | #define SET_bitOffset_TO_PAD(bitOffset, src, srcLen) \ | ||
172 | { \ | ||
173 | unsigned lastByte = (src)[(size_t)(srcLen) - 1]; \ | ||
174 | if (lastByte == 0) return SZ_ERROR_DATA; \ | ||
175 | bitOffset = (CBitCtr)((srcLen) * 8); \ | ||
176 | UPDATE_BIT_OFFSET_FOR_PADDING(lastByte, bitOffset) \ | ||
177 | } | ||
178 | |||
179 | #ifndef Z7_ZSTD_DEC_USE_HUF_STREAM1_ALWAYS | ||
180 | |||
181 | #define SET_bitOffset_TO_PAD_and_SET_BIT_SIZE(bitOffset, src, srcLen_res) \ | ||
182 | { \ | ||
183 | unsigned lastByte = (src)[(size_t)(srcLen_res) - 1]; \ | ||
184 | if (lastByte == 0) return SZ_ERROR_DATA; \ | ||
185 | srcLen_res *= 8; \ | ||
186 | bitOffset = (CBitCtr)srcLen_res; \ | ||
187 | UPDATE_BIT_OFFSET_FOR_PADDING(lastByte, bitOffset) \ | ||
188 | } | ||
189 | |||
190 | #endif | ||
191 | |||
192 | /* | ||
193 | typedef Int32 CBitCtr_signed; | ||
194 | typedef Int32 CBitCtr; | ||
195 | */ | ||
196 | // /* | ||
197 | typedef ptrdiff_t CBitCtr_signed; | ||
198 | typedef ptrdiff_t CBitCtr; | ||
199 | // */ | ||
200 | |||
201 | |||
202 | #define MATCH_LEN_MIN 3 | ||
203 | #define kBlockSizeMax (1u << 17) | ||
204 | |||
205 | // #define Z7_ZSTD_DEC_PRINT_TABLE | ||
206 | |||
207 | #ifdef Z7_ZSTD_DEC_PRINT_TABLE | ||
208 | #define NUM_OFFSET_SYMBOLS_PREDEF 29 | ||
209 | #endif | ||
210 | #define NUM_OFFSET_SYMBOLS_MAX (MAX_WINDOW_SIZE_LOG + 1) // 32 | ||
211 | #define NUM_LL_SYMBOLS 36 | ||
212 | #define NUM_ML_SYMBOLS 53 | ||
213 | #define FSE_NUM_SYMBOLS_MAX 53 // NUM_ML_SYMBOLS | ||
214 | |||
215 | // /* | ||
216 | #if !defined(MY_CPU_X86) || defined(__PIC__) || defined(MY_CPU_64BIT) | ||
217 | #define Z7_ZSTD_DEC_USE_BASES_IN_OBJECT | ||
218 | #endif | ||
219 | // */ | ||
220 | // for debug: | ||
221 | // #define Z7_ZSTD_DEC_USE_BASES_LOCAL | ||
222 | // #define Z7_ZSTD_DEC_USE_BASES_IN_OBJECT | ||
223 | |||
224 | #define GLOBAL_TABLE(n) k_ ## n | ||
225 | |||
226 | #if defined(Z7_ZSTD_DEC_USE_BASES_LOCAL) | ||
227 | #define BASES_TABLE(n) a_ ## n | ||
228 | #elif defined(Z7_ZSTD_DEC_USE_BASES_IN_OBJECT) | ||
229 | #define BASES_TABLE(n) p->m_ ## n | ||
230 | #else | ||
231 | #define BASES_TABLE(n) GLOBAL_TABLE(n) | ||
232 | #endif | ||
233 | |||
234 | #define Z7_ZSTD_DEC_USE_ML_PLUS3 | ||
235 | |||
236 | #if defined(Z7_ZSTD_DEC_USE_BASES_LOCAL) || \ | ||
237 | defined(Z7_ZSTD_DEC_USE_BASES_IN_OBJECT) | ||
238 | |||
239 | #define SEQ_EXTRA_TABLES(n) \ | ||
240 | Byte n ## SEQ_LL_EXTRA [NUM_LL_SYMBOLS]; \ | ||
241 | Byte n ## SEQ_ML_EXTRA [NUM_ML_SYMBOLS]; \ | ||
242 | UInt32 n ## SEQ_LL_BASES [NUM_LL_SYMBOLS]; \ | ||
243 | UInt32 n ## SEQ_ML_BASES [NUM_ML_SYMBOLS]; \ | ||
244 | |||
245 | #define Z7_ZSTD_DEC_USE_BASES_CALC | ||
246 | |||
247 | #ifdef Z7_ZSTD_DEC_USE_BASES_CALC | ||
248 | |||
249 | #define FILL_LOC_BASES(n, startSum) \ | ||
250 | { unsigned i; UInt32 sum = startSum; \ | ||
251 | for (i = 0; i != Z7_ARRAY_SIZE(GLOBAL_TABLE(n ## _EXTRA)); i++) \ | ||
252 | { const unsigned a = GLOBAL_TABLE(n ## _EXTRA)[i]; \ | ||
253 | BASES_TABLE(n ## _BASES)[i] = sum; \ | ||
254 | /* if (sum != GLOBAL_TABLE(n ## _BASES)[i]) exit(1); */ \ | ||
255 | sum += (UInt32)1 << a; \ | ||
256 | BASES_TABLE(n ## _EXTRA)[i] = (Byte)a; }} | ||
257 | |||
258 | #define FILL_LOC_BASES_ALL \ | ||
259 | FILL_LOC_BASES (SEQ_LL, 0) \ | ||
260 | FILL_LOC_BASES (SEQ_ML, MATCH_LEN_MIN) \ | ||
261 | |||
262 | #else | ||
263 | #define COPY_GLOBAL_ARR(n) \ | ||
264 | memcpy(BASES_TABLE(n), GLOBAL_TABLE(n), sizeof(GLOBAL_TABLE(n))); | ||
265 | #define FILL_LOC_BASES_ALL \ | ||
266 | COPY_GLOBAL_ARR (SEQ_LL_EXTRA) \ | ||
267 | COPY_GLOBAL_ARR (SEQ_ML_EXTRA) \ | ||
268 | COPY_GLOBAL_ARR (SEQ_LL_BASES) \ | ||
269 | COPY_GLOBAL_ARR (SEQ_ML_BASES) \ | ||
270 | |||
271 | #endif | ||
272 | |||
273 | #endif | ||
274 | |||
275 | |||
276 | |||
277 | /// The sequence decoding baseline and number of additional bits to read/add | ||
278 | #if !defined(Z7_ZSTD_DEC_USE_BASES_CALC) | ||
279 | static const UInt32 GLOBAL_TABLE(SEQ_LL_BASES) [NUM_LL_SYMBOLS] = | ||
280 | { | ||
281 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | ||
282 | 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, | ||
283 | 0x2000, 0x4000, 0x8000, 0x10000 | ||
284 | }; | ||
285 | #endif | ||
286 | |||
287 | static const Byte GLOBAL_TABLE(SEQ_LL_EXTRA) [NUM_LL_SYMBOLS] = | ||
288 | { | ||
289 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
290 | 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, | ||
291 | 13, 14, 15, 16 | ||
292 | }; | ||
293 | |||
294 | #if !defined(Z7_ZSTD_DEC_USE_BASES_CALC) | ||
295 | static const UInt32 GLOBAL_TABLE(SEQ_ML_BASES) [NUM_ML_SYMBOLS] = | ||
296 | { | ||
297 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, | ||
298 | 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, | ||
299 | 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, | ||
300 | 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 | ||
301 | }; | ||
302 | #endif | ||
303 | |||
304 | static const Byte GLOBAL_TABLE(SEQ_ML_EXTRA) [NUM_ML_SYMBOLS] = | ||
305 | { | ||
306 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
307 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
308 | 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, | ||
309 | 12, 13, 14, 15, 16 | ||
310 | }; | ||
311 | |||
312 | |||
313 | #ifdef Z7_ZSTD_DEC_PRINT_TABLE | ||
314 | |||
315 | static const Int16 SEQ_LL_PREDEF_DIST [NUM_LL_SYMBOLS] = | ||
316 | { | ||
317 | 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, | ||
318 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, | ||
319 | -1,-1,-1,-1 | ||
320 | }; | ||
321 | static const Int16 SEQ_OFFSET_PREDEF_DIST [NUM_OFFSET_SYMBOLS_PREDEF] = | ||
322 | { | ||
323 | 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, | ||
324 | 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 | ||
325 | }; | ||
326 | static const Int16 SEQ_ML_PREDEF_DIST [NUM_ML_SYMBOLS] = | ||
327 | { | ||
328 | 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, | ||
329 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | ||
330 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, | ||
331 | -1,-1,-1,-1,-1 | ||
332 | }; | ||
333 | |||
334 | #endif | ||
335 | |||
336 | // typedef int FastInt; | ||
337 | // typedef Int32 FastInt32; | ||
338 | typedef unsigned FastInt; | ||
339 | typedef UInt32 FastInt32; | ||
340 | typedef FastInt32 CFseRecord; | ||
341 | |||
342 | |||
343 | #define FSE_REC_LEN_OFFSET 8 | ||
344 | #define FSE_REC_STATE_OFFSET 16 | ||
345 | #define GET_FSE_REC_SYM(st) ((Byte)(st)) | ||
346 | #define GET_FSE_REC_LEN(st) ((Byte)((st) >> FSE_REC_LEN_OFFSET)) | ||
347 | #define GET_FSE_REC_STATE(st) ((st) >> FSE_REC_STATE_OFFSET) | ||
348 | |||
349 | // #define FSE_REC_SYM_MASK (0xff) | ||
350 | // #define GET_FSE_REC_SYM(st) (st & FSE_REC_SYM_MASK) | ||
351 | |||
352 | #define W_BASE(state, len, sym) \ | ||
353 | (((UInt32)state << (4 + FSE_REC_STATE_OFFSET)) + \ | ||
354 | (len << FSE_REC_LEN_OFFSET) + (sym)) | ||
355 | #define W(state, len, sym) W_BASE(state, len, sym) | ||
356 | static const CFseRecord k_PredefRecords_LL[1 << 6] = { | ||
357 | W(0,4, 0),W(1,4, 0),W(2,5, 1),W(0,5, 3),W(0,5, 4),W(0,5, 6),W(0,5, 7),W(0,5, 9), | ||
358 | W(0,5,10),W(0,5,12),W(0,6,14),W(0,5,16),W(0,5,18),W(0,5,19),W(0,5,21),W(0,5,22), | ||
359 | W(0,5,24),W(2,5,25),W(0,5,26),W(0,6,27),W(0,6,29),W(0,6,31),W(2,4, 0),W(0,4, 1), | ||
360 | W(0,5, 2),W(2,5, 4),W(0,5, 5),W(2,5, 7),W(0,5, 8),W(2,5,10),W(0,5,11),W(0,6,13), | ||
361 | W(2,5,16),W(0,5,17),W(2,5,19),W(0,5,20),W(2,5,22),W(0,5,23),W(0,4,25),W(1,4,25), | ||
362 | W(2,5,26),W(0,6,28),W(0,6,30),W(3,4, 0),W(1,4, 1),W(2,5, 2),W(2,5, 3),W(2,5, 5), | ||
363 | W(2,5, 6),W(2,5, 8),W(2,5, 9),W(2,5,11),W(2,5,12),W(0,6,15),W(2,5,17),W(2,5,18), | ||
364 | W(2,5,20),W(2,5,21),W(2,5,23),W(2,5,24),W(0,6,35),W(0,6,34),W(0,6,33),W(0,6,32) | ||
365 | }; | ||
366 | static const CFseRecord k_PredefRecords_OF[1 << 5] = { | ||
367 | W(0,5, 0),W(0,4, 6),W(0,5, 9),W(0,5,15),W(0,5,21),W(0,5, 3),W(0,4, 7),W(0,5,12), | ||
368 | W(0,5,18),W(0,5,23),W(0,5, 5),W(0,4, 8),W(0,5,14),W(0,5,20),W(0,5, 2),W(1,4, 7), | ||
369 | W(0,5,11),W(0,5,17),W(0,5,22),W(0,5, 4),W(1,4, 8),W(0,5,13),W(0,5,19),W(0,5, 1), | ||
370 | W(1,4, 6),W(0,5,10),W(0,5,16),W(0,5,28),W(0,5,27),W(0,5,26),W(0,5,25),W(0,5,24) | ||
371 | }; | ||
372 | #if defined(Z7_ZSTD_DEC_USE_ML_PLUS3) | ||
373 | #undef W | ||
374 | #define W(state, len, sym) W_BASE(state, len, (sym + MATCH_LEN_MIN)) | ||
375 | #endif | ||
376 | static const CFseRecord k_PredefRecords_ML[1 << 6] = { | ||
377 | W(0,6, 0),W(0,4, 1),W(2,5, 2),W(0,5, 3),W(0,5, 5),W(0,5, 6),W(0,5, 8),W(0,6,10), | ||
378 | W(0,6,13),W(0,6,16),W(0,6,19),W(0,6,22),W(0,6,25),W(0,6,28),W(0,6,31),W(0,6,33), | ||
379 | W(0,6,35),W(0,6,37),W(0,6,39),W(0,6,41),W(0,6,43),W(0,6,45),W(1,4, 1),W(0,4, 2), | ||
380 | W(2,5, 3),W(0,5, 4),W(2,5, 6),W(0,5, 7),W(0,6, 9),W(0,6,12),W(0,6,15),W(0,6,18), | ||
381 | W(0,6,21),W(0,6,24),W(0,6,27),W(0,6,30),W(0,6,32),W(0,6,34),W(0,6,36),W(0,6,38), | ||
382 | W(0,6,40),W(0,6,42),W(0,6,44),W(2,4, 1),W(3,4, 1),W(1,4, 2),W(2,5, 4),W(2,5, 5), | ||
383 | W(2,5, 7),W(2,5, 8),W(0,6,11),W(0,6,14),W(0,6,17),W(0,6,20),W(0,6,23),W(0,6,26), | ||
384 | W(0,6,29),W(0,6,52),W(0,6,51),W(0,6,50),W(0,6,49),W(0,6,48),W(0,6,47),W(0,6,46) | ||
385 | }; | ||
386 | |||
387 | |||
388 | // sum of freqs[] must be correct | ||
389 | // (numSyms != 0) | ||
390 | // (accuracy >= 5) | ||
391 | static | ||
392 | Z7_NO_INLINE | ||
393 | // Z7_FORCE_INLINE | ||
394 | void FSE_Generate(CFseRecord *table, | ||
395 | const Int16 *const freqs, const size_t numSyms, | ||
396 | const unsigned accuracy, UInt32 delta) | ||
397 | { | ||
398 | size_t size = (size_t)1 << accuracy; | ||
399 | // max value in states[x] is ((1 << accuracy) * 2) | ||
400 | UInt16 states[FSE_NUM_SYMBOLS_MAX]; | ||
401 | { | ||
402 | /* Symbols with "less than 1" probability get a single cell, | ||
403 | starting from the end of the table. | ||
404 | These symbols define a full state reset, reading (accuracy) bits. */ | ||
405 | size_t threshold = size; | ||
406 | { | ||
407 | size_t s = 0; | ||
408 | do | ||
409 | if (freqs[s] == -1) | ||
410 | { | ||
411 | table[--threshold] = (CFseRecord)s; | ||
412 | states[s] = 1; | ||
413 | } | ||
414 | while (++s != numSyms); | ||
415 | } | ||
416 | |||
417 | #ifdef SHOW_STAT | ||
418 | if (threshold == size) | ||
419 | { | ||
420 | STAT_INC(g_Num_Threshold_0) | ||
421 | STAT_UPDATE(g_Num_Threshold_0sum += (unsigned)size;) | ||
422 | } | ||
423 | else | ||
424 | { | ||
425 | STAT_INC(g_Num_Threshold_1) | ||
426 | STAT_UPDATE(g_Num_Threshold_1sum += (unsigned)size;) | ||
427 | } | ||
428 | #endif | ||
429 | |||
430 | // { unsigned uuu; for (uuu = 0; uuu < 400; uuu++) | ||
431 | { | ||
432 | // Each (symbol) gets freqs[symbol] cells. | ||
433 | // Cell allocation is spread, not linear. | ||
434 | const size_t step = (size >> 1) + (size >> 3) + 3; | ||
435 | size_t pos = 0; | ||
436 | // const unsigned mask = size - 1; | ||
437 | /* | ||
438 | if (threshold == size) | ||
439 | { | ||
440 | size_t s = 0; | ||
441 | size--; | ||
442 | do | ||
443 | { | ||
444 | int freq = freqs[s]; | ||
445 | if (freq <= 0) | ||
446 | continue; | ||
447 | states[s] = (UInt16)freq; | ||
448 | do | ||
449 | { | ||
450 | table[pos] (CFseRecord)s; | ||
451 | pos = (pos + step) & size; // & mask; | ||
452 | } | ||
453 | while (--freq); | ||
454 | } | ||
455 | while (++s != numSyms); | ||
456 | } | ||
457 | else | ||
458 | */ | ||
459 | { | ||
460 | size_t s = 0; | ||
461 | size--; | ||
462 | do | ||
463 | { | ||
464 | int freq = freqs[s]; | ||
465 | if (freq <= 0) | ||
466 | continue; | ||
467 | states[s] = (UInt16)freq; | ||
468 | do | ||
469 | { | ||
470 | table[pos] = (CFseRecord)s; | ||
471 | // we skip position, if it's already occupied by a "less than 1" probability symbol. | ||
472 | // (step) is coprime to table size, so the cycle will visit each position exactly once | ||
473 | do | ||
474 | pos = (pos + step) & size; // & mask; | ||
475 | while (pos >= threshold); | ||
476 | } | ||
477 | while (--freq); | ||
478 | } | ||
479 | while (++s != numSyms); | ||
480 | } | ||
481 | size++; | ||
482 | // (pos != 0) is unexpected case that means that freqs[] are not correct. | ||
483 | // so it's some failure in code (for example, incorrect predefined freq[] table) | ||
484 | // if (pos != 0) return SZ_ERROR_FAIL; | ||
485 | } | ||
486 | // } | ||
487 | } | ||
488 | { | ||
489 | const CFseRecord * const limit = table + size; | ||
490 | delta = ((UInt32)size << FSE_REC_STATE_OFFSET) - delta; | ||
491 | /* State increases by symbol over time, decreasing number of bits. | ||
492 | Baseline increases until the bit threshold is passed, at which point it resets to 0 */ | ||
493 | do | ||
494 | { | ||
495 | #define TABLE_ITER(a) \ | ||
496 | { \ | ||
497 | const FastInt sym = (FastInt)table[a]; \ | ||
498 | const unsigned nextState = states[sym]; \ | ||
499 | unsigned nb; \ | ||
500 | states[sym] = (UInt16)(nextState + 1); \ | ||
501 | nb = accuracy - GetHighestSetBit_32_nonzero_small(nextState); \ | ||
502 | table[a] = (CFseRecord)(sym - delta \ | ||
503 | + ((UInt32)nb << FSE_REC_LEN_OFFSET) \ | ||
504 | + ((UInt32)nextState << FSE_REC_STATE_OFFSET << nb)); \ | ||
505 | } | ||
506 | TABLE_ITER(0) | ||
507 | TABLE_ITER(1) | ||
508 | table += 2; | ||
509 | } | ||
510 | while (table != limit); | ||
511 | } | ||
512 | } | ||
513 | |||
514 | |||
515 | #ifdef Z7_ZSTD_DEC_PRINT_TABLE | ||
516 | |||
517 | static void Print_Predef(unsigned predefAccuracy, | ||
518 | const unsigned numSymsPredef, | ||
519 | const Int16 * const predefFreqs, | ||
520 | const CFseRecord *checkTable) | ||
521 | { | ||
522 | CFseRecord table[1 << 6]; | ||
523 | unsigned i; | ||
524 | FSE_Generate(table, predefFreqs, numSymsPredef, predefAccuracy, | ||
525 | #if defined(Z7_ZSTD_DEC_USE_ML_PLUS3) | ||
526 | numSymsPredef == NUM_ML_SYMBOLS ? MATCH_LEN_MIN : | ||
527 | #endif | ||
528 | 0 | ||
529 | ); | ||
530 | if (memcmp(table, checkTable, sizeof(UInt32) << predefAccuracy) != 0) | ||
531 | exit(1); | ||
532 | for (i = 0; i < (1u << predefAccuracy); i++) | ||
533 | { | ||
534 | const UInt32 v = table[i]; | ||
535 | const unsigned state = (unsigned)(GET_FSE_REC_STATE(v)); | ||
536 | if (state & 0xf) | ||
537 | exit(1); | ||
538 | if (i != 0) | ||
539 | { | ||
540 | printf(","); | ||
541 | if (i % 8 == 0) | ||
542 | printf("\n"); | ||
543 | } | ||
544 | printf("W(%d,%d,%2d)", | ||
545 | (unsigned)(state >> 4), | ||
546 | (unsigned)((v >> FSE_REC_LEN_OFFSET) & 0xff), | ||
547 | (unsigned)GET_FSE_REC_SYM(v)); | ||
548 | } | ||
549 | printf("\n\n"); | ||
550 | } | ||
551 | |||
552 | #endif | ||
553 | |||
554 | |||
555 | #define GET16(dest, p) { const Byte *ptr = p; dest = GetUi16(ptr); } | ||
556 | #define GET32(dest, p) { const Byte *ptr = p; dest = GetUi32(ptr); } | ||
557 | |||
558 | // (1 <= numBits <= 9) | ||
559 | #define FORWARD_READ_BITS(destVal, numBits, mask) \ | ||
560 | { const CBitCtr_signed bos3 = (bitOffset) >> 3; \ | ||
561 | if (bos3 >= 0) return SZ_ERROR_DATA; \ | ||
562 | GET16(destVal, src + bos3) \ | ||
563 | destVal >>= (bitOffset) & 7; \ | ||
564 | bitOffset += (CBitCtr_signed)(numBits); \ | ||
565 | mask = (1u << (numBits)) - 1; \ | ||
566 | destVal &= mask; \ | ||
567 | } | ||
568 | |||
569 | #define FORWARD_READ_1BIT(destVal) \ | ||
570 | { const CBitCtr_signed bos3 = (bitOffset) >> 3; \ | ||
571 | if (bos3 >= 0) return SZ_ERROR_DATA; \ | ||
572 | destVal = *(src + bos3); \ | ||
573 | destVal >>= (bitOffset) & 7; \ | ||
574 | (bitOffset)++; \ | ||
575 | destVal &= 1; \ | ||
576 | } | ||
577 | |||
578 | |||
579 | // in: (accuracyMax <= 9) | ||
580 | // at least 2 bytes will be processed from (in) stream. | ||
581 | // at return: (in->len > 0) | ||
582 | static | ||
583 | Z7_NO_INLINE | ||
584 | SRes FSE_DecodeHeader(CFseRecord *const table, | ||
585 | CInBufPair *const in, | ||
586 | const unsigned accuracyMax, | ||
587 | Byte *const accuracyRes, | ||
588 | unsigned numSymbolsMax) | ||
589 | { | ||
590 | unsigned accuracy; | ||
591 | unsigned remain1; | ||
592 | unsigned syms; | ||
593 | Int16 freqs[FSE_NUM_SYMBOLS_MAX + 3]; // +3 for overwrite (repeat) | ||
594 | const Byte *src = in->ptr; | ||
595 | CBitCtr_signed bitOffset = (CBitCtr_signed)in->len - 1; | ||
596 | if (bitOffset <= 0) | ||
597 | return SZ_ERROR_DATA; | ||
598 | accuracy = *src & 0xf; | ||
599 | accuracy += 5; | ||
600 | if (accuracy > accuracyMax) | ||
601 | return SZ_ERROR_DATA; | ||
602 | *accuracyRes = (Byte)accuracy; | ||
603 | remain1 = (1u << accuracy) + 1; // (it's remain_freqs_sum + 1) | ||
604 | syms = 0; | ||
605 | src += bitOffset; // src points to last byte | ||
606 | bitOffset = 4 - (bitOffset << 3); | ||
607 | |||
608 | for (;;) | ||
609 | { | ||
610 | // (2 <= remain1) | ||
611 | const unsigned bits = GetHighestSetBit_32_nonzero_small((unsigned)remain1); | ||
612 | // (1 <= bits <= accuracy) | ||
613 | unsigned val; // it must be unsigned or int | ||
614 | unsigned mask; | ||
615 | FORWARD_READ_BITS(val, bits, mask) | ||
616 | { | ||
617 | const unsigned val2 = remain1 + val - mask; | ||
618 | if (val2 > mask) | ||
619 | { | ||
620 | unsigned bit; | ||
621 | FORWARD_READ_1BIT(bit) | ||
622 | if (bit) | ||
623 | val = val2; | ||
624 | } | ||
625 | } | ||
626 | { | ||
627 | // (remain1 >= 2) | ||
628 | // (0 <= (int)val <= remain1) | ||
629 | val = (unsigned)((int)val - 1); | ||
630 | // val now is "probability" of symbol | ||
631 | // (probability == -1) means "less than 1" frequency. | ||
632 | // (-1 <= (int)val <= remain1 - 1) | ||
633 | freqs[syms++] = (Int16)(int)val; | ||
634 | if (val != 0) | ||
635 | { | ||
636 | remain1 -= (int)val < 0 ? 1u : (unsigned)val; | ||
637 | // remain1 -= val; | ||
638 | // val >>= (sizeof(val) * 8 - 2); | ||
639 | // remain1 -= val & 2; | ||
640 | // freqs[syms++] = (Int16)(int)val; | ||
641 | // syms++; | ||
642 | if (remain1 == 1) | ||
643 | break; | ||
644 | if (syms >= FSE_NUM_SYMBOLS_MAX) | ||
645 | return SZ_ERROR_DATA; | ||
646 | } | ||
647 | else // if (val == 0) | ||
648 | { | ||
649 | // freqs[syms++] = 0; | ||
650 | // syms++; | ||
651 | for (;;) | ||
652 | { | ||
653 | unsigned repeat; | ||
654 | FORWARD_READ_BITS(repeat, 2, mask) | ||
655 | freqs[syms ] = 0; | ||
656 | freqs[syms + 1] = 0; | ||
657 | freqs[syms + 2] = 0; | ||
658 | syms += repeat; | ||
659 | if (syms >= FSE_NUM_SYMBOLS_MAX) | ||
660 | return SZ_ERROR_DATA; | ||
661 | if (repeat != 3) | ||
662 | break; | ||
663 | } | ||
664 | } | ||
665 | } | ||
666 | } | ||
667 | |||
668 | if (syms > numSymbolsMax) | ||
669 | return SZ_ERROR_DATA; | ||
670 | bitOffset += 7; | ||
671 | bitOffset >>= 3; | ||
672 | if (bitOffset > 0) | ||
673 | return SZ_ERROR_DATA; | ||
674 | in->ptr = src + bitOffset; | ||
675 | in->len = (size_t)(1 - bitOffset); | ||
676 | { | ||
677 | // unsigned uuu; for (uuu = 0; uuu < 50; uuu++) | ||
678 | FSE_Generate(table, freqs, syms, accuracy, | ||
679 | #if defined(Z7_ZSTD_DEC_USE_ML_PLUS3) | ||
680 | numSymbolsMax == NUM_ML_SYMBOLS ? MATCH_LEN_MIN : | ||
681 | #endif | ||
682 | 0 | ||
683 | ); | ||
684 | } | ||
685 | return SZ_OK; | ||
686 | } | ||
687 | |||
688 | |||
689 | // ---------- HUFFMAN ---------- | ||
690 | |||
691 | #define HUF_MAX_BITS 12 | ||
692 | #define HUF_MAX_SYMBS 256 | ||
693 | #define HUF_DUMMY_SIZE (128 + 8 * 2) // it must multiple of 8 | ||
694 | // #define HUF_DUMMY_SIZE 0 | ||
695 | #define HUF_TABLE_SIZE ((2 << HUF_MAX_BITS) + HUF_DUMMY_SIZE) | ||
696 | #define HUF_GET_SYMBOLS(table) ((table) + (1 << HUF_MAX_BITS) + HUF_DUMMY_SIZE) | ||
697 | // #define HUF_GET_LENS(table) (table) | ||
698 | |||
699 | typedef struct | ||
700 | { | ||
701 | // Byte table[HUF_TABLE_SIZE]; | ||
702 | UInt64 table64[HUF_TABLE_SIZE / sizeof(UInt64)]; | ||
703 | } | ||
704 | CZstdDecHufTable; | ||
705 | |||
706 | /* | ||
707 | Input: | ||
708 | numSyms != 0 | ||
709 | (bits) array size must be aligned for 2 | ||
710 | if (numSyms & 1), then bits[numSyms] == 0, | ||
711 | Huffman tree must be correct before Huf_Build() call: | ||
712 | (sum (1/2^bits[i]) == 1). | ||
713 | && (bits[i] <= HUF_MAX_BITS) | ||
714 | */ | ||
715 | static | ||
716 | Z7_FORCE_INLINE | ||
717 | void Huf_Build(Byte * const table, | ||
718 | const Byte *bits, const unsigned numSyms) | ||
719 | { | ||
720 | unsigned counts0[HUF_MAX_BITS + 1]; | ||
721 | unsigned counts1[HUF_MAX_BITS + 1]; | ||
722 | const Byte * const bitsEnd = bits + numSyms; | ||
723 | // /* | ||
724 | { | ||
725 | unsigned t; | ||
726 | for (t = 0; t < Z7_ARRAY_SIZE(counts0); t++) counts0[t] = 0; | ||
727 | for (t = 0; t < Z7_ARRAY_SIZE(counts1); t++) counts1[t] = 0; | ||
728 | } | ||
729 | // */ | ||
730 | // memset(counts0, 0, sizeof(counts0)); | ||
731 | // memset(counts1, 0, sizeof(counts1)); | ||
732 | { | ||
733 | const Byte *bits2 = bits; | ||
734 | // we access additional bits[symbol] if (numSyms & 1) | ||
735 | do | ||
736 | { | ||
737 | counts0[bits2[0]]++; | ||
738 | counts1[bits2[1]]++; | ||
739 | } | ||
740 | while ((bits2 += 2) < bitsEnd); | ||
741 | } | ||
742 | { | ||
743 | unsigned r = 0; | ||
744 | unsigned i = HUF_MAX_BITS; | ||
745 | // Byte *lens = HUF_GET_LENS(symbols); | ||
746 | do | ||
747 | { | ||
748 | const unsigned num = (counts0[i] + counts1[i]) << (HUF_MAX_BITS - i); | ||
749 | counts0[i] = r; | ||
750 | if (num) | ||
751 | { | ||
752 | Byte *lens = &table[r]; | ||
753 | r += num; | ||
754 | memset(lens, (int)i, num); | ||
755 | } | ||
756 | } | ||
757 | while (--i); | ||
758 | counts0[0] = 0; // for speculated loads | ||
759 | // no need for check: | ||
760 | // if (r != (UInt32)1 << HUF_MAX_BITS) exit(0); | ||
761 | } | ||
762 | { | ||
763 | #ifdef MY_CPU_64BIT | ||
764 | UInt64 | ||
765 | #else | ||
766 | UInt32 | ||
767 | #endif | ||
768 | v = 0; | ||
769 | Byte *symbols = HUF_GET_SYMBOLS(table); | ||
770 | do | ||
771 | { | ||
772 | const unsigned nb = *bits++; | ||
773 | if (nb) | ||
774 | { | ||
775 | const unsigned code = counts0[nb]; | ||
776 | const unsigned num = (1u << HUF_MAX_BITS) >> nb; | ||
777 | counts0[nb] = code + num; | ||
778 | // memset(&symbols[code], i, num); | ||
779 | // /* | ||
780 | { | ||
781 | Byte *s2 = &symbols[code]; | ||
782 | if (num <= 2) | ||
783 | { | ||
784 | s2[0] = (Byte)v; | ||
785 | s2[(size_t)num - 1] = (Byte)v; | ||
786 | } | ||
787 | else if (num <= 8) | ||
788 | { | ||
789 | *(UInt32 *)(void *)s2 = (UInt32)v; | ||
790 | *(UInt32 *)(void *)(s2 + (size_t)num - 4) = (UInt32)v; | ||
791 | } | ||
792 | else | ||
793 | { | ||
794 | #ifdef MY_CPU_64BIT | ||
795 | UInt64 *s = (UInt64 *)(void *)s2; | ||
796 | const UInt64 *lim = (UInt64 *)(void *)(s2 + num); | ||
797 | do | ||
798 | { | ||
799 | s[0] = v; s[1] = v; s += 2; | ||
800 | } | ||
801 | while (s != lim); | ||
802 | #else | ||
803 | UInt32 *s = (UInt32 *)(void *)s2; | ||
804 | const UInt32 *lim = (const UInt32 *)(const void *)(s2 + num); | ||
805 | do | ||
806 | { | ||
807 | s[0] = v; s[1] = v; s += 2; | ||
808 | s[0] = v; s[1] = v; s += 2; | ||
809 | } | ||
810 | while (s != lim); | ||
811 | #endif | ||
812 | } | ||
813 | } | ||
814 | // */ | ||
815 | } | ||
816 | v += | ||
817 | #ifdef MY_CPU_64BIT | ||
818 | 0x0101010101010101; | ||
819 | #else | ||
820 | 0x01010101; | ||
821 | #endif | ||
822 | } | ||
823 | while (bits != bitsEnd); | ||
824 | } | ||
825 | } | ||
826 | |||
827 | |||
828 | |||
829 | // how many bytes (src) was moved back from original value. | ||
830 | // we need (HUF_SRC_OFFSET == 3) for optimized 32-bit memory access | ||
831 | #define HUF_SRC_OFFSET 3 | ||
832 | |||
833 | // v <<= 8 - (bitOffset & 7) + numBits; | ||
834 | // v >>= 32 - HUF_MAX_BITS; | ||
835 | #define HUF_GET_STATE(v, bitOffset, numBits) \ | ||
836 | GET32(v, src + (HUF_SRC_OFFSET - 3) + ((CBitCtr_signed)bitOffset >> 3)) \ | ||
837 | v >>= 32 - HUF_MAX_BITS - 8 + ((unsigned)bitOffset & 7) - numBits; \ | ||
838 | v &= (1u << HUF_MAX_BITS) - 1; \ | ||
839 | |||
840 | |||
841 | #ifndef Z7_ZSTD_DEC_USE_HUF_STREAM1_ALWAYS | ||
842 | #if defined(MY_CPU_AMD64) && defined(_MSC_VER) && _MSC_VER == 1400 \ | ||
843 | || !defined(MY_CPU_X86_OR_AMD64) \ | ||
844 | // || 1 == 1 /* for debug : to force STREAM4_PRELOAD mode */ | ||
845 | // we need big number (>=16) of registers for PRELOAD4 | ||
846 | #define Z7_ZSTD_DEC_USE_HUF_STREAM4_PRELOAD4 | ||
847 | // #define Z7_ZSTD_DEC_USE_HUF_STREAM4_PRELOAD2 // for debug | ||
848 | #endif | ||
849 | #endif | ||
850 | |||
851 | // for debug: simpler and smaller code but slow: | ||
852 | // #define Z7_ZSTD_DEC_USE_HUF_STREAM1_SIMPLE | ||
853 | |||
854 | #if defined(Z7_ZSTD_DEC_USE_HUF_STREAM1_SIMPLE) || \ | ||
855 | !defined(Z7_ZSTD_DEC_USE_HUF_STREAM1_ALWAYS) | ||
856 | |||
857 | #define HUF_DECODE(bitOffset, dest) \ | ||
858 | { \ | ||
859 | UInt32 v; \ | ||
860 | HUF_GET_STATE(v, bitOffset, 0) \ | ||
861 | bitOffset -= table[v]; \ | ||
862 | *(dest) = symbols[v]; \ | ||
863 | if ((CBitCtr_signed)bitOffset < 0) return SZ_ERROR_DATA; \ | ||
864 | } | ||
865 | |||
866 | #endif | ||
867 | |||
868 | #if !defined(Z7_ZSTD_DEC_USE_HUF_STREAM1_SIMPLE) || \ | ||
869 | defined(Z7_ZSTD_DEC_USE_HUF_STREAM4_PRELOAD4) || \ | ||
870 | defined(Z7_ZSTD_DEC_USE_HUF_STREAM4_PRELOAD2) \ | ||
871 | |||
872 | #define HUF_DECODE_2_INIT(v, bitOffset) \ | ||
873 | HUF_GET_STATE(v, bitOffset, 0) | ||
874 | |||
875 | #define HUF_DECODE_2(v, bitOffset, dest) \ | ||
876 | { \ | ||
877 | unsigned numBits; \ | ||
878 | numBits = table[v]; \ | ||
879 | *(dest) = symbols[v]; \ | ||
880 | HUF_GET_STATE(v, bitOffset, numBits) \ | ||
881 | bitOffset -= (CBitCtr)numBits; \ | ||
882 | if ((CBitCtr_signed)bitOffset < 0) return SZ_ERROR_DATA; \ | ||
883 | } | ||
884 | |||
885 | #endif | ||
886 | |||
887 | |||
888 | // src == ptr - HUF_SRC_OFFSET | ||
889 | // we are allowed to access 3 bytes before start of input buffer | ||
890 | static | ||
891 | Z7_NO_INLINE | ||
892 | SRes Huf_Decompress_1stream(const Byte * const table, | ||
893 | const Byte *src, const size_t srcLen, | ||
894 | Byte *dest, const size_t destLen) | ||
895 | { | ||
896 | CBitCtr bitOffset; | ||
897 | if (srcLen == 0) | ||
898 | return SZ_ERROR_DATA; | ||
899 | SET_bitOffset_TO_PAD (bitOffset, src + HUF_SRC_OFFSET, srcLen) | ||
900 | if (destLen) | ||
901 | { | ||
902 | const Byte *symbols = HUF_GET_SYMBOLS(table); | ||
903 | const Byte *destLim = dest + destLen; | ||
904 | #ifdef Z7_ZSTD_DEC_USE_HUF_STREAM1_SIMPLE | ||
905 | { | ||
906 | do | ||
907 | { | ||
908 | HUF_DECODE (bitOffset, dest) | ||
909 | } | ||
910 | while (++dest != destLim); | ||
911 | } | ||
912 | #else | ||
913 | { | ||
914 | UInt32 v; | ||
915 | HUF_DECODE_2_INIT (v, bitOffset) | ||
916 | do | ||
917 | { | ||
918 | HUF_DECODE_2 (v, bitOffset, dest) | ||
919 | } | ||
920 | while (++dest != destLim); | ||
921 | } | ||
922 | #endif | ||
923 | } | ||
924 | return bitOffset == 0 ? SZ_OK : SZ_ERROR_DATA; | ||
925 | } | ||
926 | |||
927 | |||
928 | // for debug : it reduces register pressure : by array copy can be slow : | ||
929 | // #define Z7_ZSTD_DEC_USE_HUF_LOCAL | ||
930 | |||
931 | // src == ptr + (6 - HUF_SRC_OFFSET) | ||
932 | // srcLen >= 10 | ||
933 | // we are allowed to access 3 bytes before start of input buffer | ||
934 | static | ||
935 | Z7_NO_INLINE | ||
936 | SRes Huf_Decompress_4stream(const Byte * const | ||
937 | #ifdef Z7_ZSTD_DEC_USE_HUF_LOCAL | ||
938 | table2, | ||
939 | #else | ||
940 | table, | ||
941 | #endif | ||
942 | const Byte *src, size_t srcLen, | ||
943 | Byte *dest, size_t destLen) | ||
944 | { | ||
945 | #ifdef Z7_ZSTD_DEC_USE_HUF_LOCAL | ||
946 | Byte table[HUF_TABLE_SIZE]; | ||
947 | #endif | ||
948 | UInt32 sizes[3]; | ||
949 | const size_t delta = (destLen + 3) / 4; | ||
950 | if ((sizes[0] = GetUi16(src + (0 + HUF_SRC_OFFSET - 6))) == 0) return SZ_ERROR_DATA; | ||
951 | if ((sizes[1] = GetUi16(src + (2 + HUF_SRC_OFFSET - 6))) == 0) return SZ_ERROR_DATA; | ||
952 | sizes[1] += sizes[0]; | ||
953 | if ((sizes[2] = GetUi16(src + (4 + HUF_SRC_OFFSET - 6))) == 0) return SZ_ERROR_DATA; | ||
954 | sizes[2] += sizes[1]; | ||
955 | srcLen -= 6; | ||
956 | if (srcLen <= sizes[2]) | ||
957 | return SZ_ERROR_DATA; | ||
958 | |||
959 | #ifdef Z7_ZSTD_DEC_USE_HUF_LOCAL | ||
960 | { | ||
961 | // unsigned i = 0; for(; i < 1000; i++) | ||
962 | memcpy(table, table2, HUF_TABLE_SIZE); | ||
963 | } | ||
964 | #endif | ||
965 | |||
966 | #ifndef Z7_ZSTD_DEC_USE_HUF_STREAM1_ALWAYS | ||
967 | { | ||
968 | CBitCtr bitOffset_0, | ||
969 | bitOffset_1, | ||
970 | bitOffset_2, | ||
971 | bitOffset_3; | ||
972 | { | ||
973 | SET_bitOffset_TO_PAD_and_SET_BIT_SIZE (bitOffset_0, src + HUF_SRC_OFFSET, sizes[0]) | ||
974 | SET_bitOffset_TO_PAD_and_SET_BIT_SIZE (bitOffset_1, src + HUF_SRC_OFFSET, sizes[1]) | ||
975 | SET_bitOffset_TO_PAD_and_SET_BIT_SIZE (bitOffset_2, src + HUF_SRC_OFFSET, sizes[2]) | ||
976 | SET_bitOffset_TO_PAD (bitOffset_3, src + HUF_SRC_OFFSET, srcLen) | ||
977 | } | ||
978 | { | ||
979 | const Byte * const symbols = HUF_GET_SYMBOLS(table); | ||
980 | Byte *destLim = dest + destLen - delta * 3; | ||
981 | |||
982 | if (dest != destLim) | ||
983 | #ifdef Z7_ZSTD_DEC_USE_HUF_STREAM4_PRELOAD4 | ||
984 | { | ||
985 | UInt32 v_0, v_1, v_2, v_3; | ||
986 | HUF_DECODE_2_INIT (v_0, bitOffset_0) | ||
987 | HUF_DECODE_2_INIT (v_1, bitOffset_1) | ||
988 | HUF_DECODE_2_INIT (v_2, bitOffset_2) | ||
989 | HUF_DECODE_2_INIT (v_3, bitOffset_3) | ||
990 | // #define HUF_DELTA (1 << 17) / 4 | ||
991 | do | ||
992 | { | ||
993 | HUF_DECODE_2 (v_3, bitOffset_3, dest + delta * 3) | ||
994 | HUF_DECODE_2 (v_2, bitOffset_2, dest + delta * 2) | ||
995 | HUF_DECODE_2 (v_1, bitOffset_1, dest + delta) | ||
996 | HUF_DECODE_2 (v_0, bitOffset_0, dest) | ||
997 | } | ||
998 | while (++dest != destLim); | ||
999 | /* | ||
1000 | {// unsigned y = 0; for (;y < 1; y++) | ||
1001 | { | ||
1002 | const size_t num = destLen - delta * 3; | ||
1003 | Byte *orig = dest - num; | ||
1004 | memmove (orig + delta , orig + HUF_DELTA, num); | ||
1005 | memmove (orig + delta * 2, orig + HUF_DELTA * 2, num); | ||
1006 | memmove (orig + delta * 3, orig + HUF_DELTA * 3, num); | ||
1007 | }} | ||
1008 | */ | ||
1009 | } | ||
1010 | #elif defined(Z7_ZSTD_DEC_USE_HUF_STREAM4_PRELOAD2) | ||
1011 | { | ||
1012 | UInt32 v_0, v_1, v_2, v_3; | ||
1013 | HUF_DECODE_2_INIT (v_0, bitOffset_0) | ||
1014 | HUF_DECODE_2_INIT (v_1, bitOffset_1) | ||
1015 | do | ||
1016 | { | ||
1017 | HUF_DECODE_2 (v_0, bitOffset_0, dest) | ||
1018 | HUF_DECODE_2 (v_1, bitOffset_1, dest + delta) | ||
1019 | } | ||
1020 | while (++dest != destLim); | ||
1021 | dest = destLim - (destLen - delta * 3); | ||
1022 | dest += delta * 2; | ||
1023 | destLim += delta * 2; | ||
1024 | HUF_DECODE_2_INIT (v_2, bitOffset_2) | ||
1025 | HUF_DECODE_2_INIT (v_3, bitOffset_3) | ||
1026 | do | ||
1027 | { | ||
1028 | HUF_DECODE_2 (v_2, bitOffset_2, dest) | ||
1029 | HUF_DECODE_2 (v_3, bitOffset_3, dest + delta) | ||
1030 | } | ||
1031 | while (++dest != destLim); | ||
1032 | dest -= delta * 2; | ||
1033 | destLim -= delta * 2; | ||
1034 | } | ||
1035 | #else | ||
1036 | { | ||
1037 | do | ||
1038 | { | ||
1039 | HUF_DECODE (bitOffset_3, dest + delta * 3) | ||
1040 | HUF_DECODE (bitOffset_2, dest + delta * 2) | ||
1041 | HUF_DECODE (bitOffset_1, dest + delta) | ||
1042 | HUF_DECODE (bitOffset_0, dest) | ||
1043 | } | ||
1044 | while (++dest != destLim); | ||
1045 | } | ||
1046 | #endif | ||
1047 | |||
1048 | if (bitOffset_3 != (CBitCtr)sizes[2]) | ||
1049 | return SZ_ERROR_DATA; | ||
1050 | if (destLen &= 3) | ||
1051 | { | ||
1052 | destLim = dest + 4 - destLen; | ||
1053 | do | ||
1054 | { | ||
1055 | HUF_DECODE (bitOffset_2, dest + delta * 2) | ||
1056 | HUF_DECODE (bitOffset_1, dest + delta) | ||
1057 | HUF_DECODE (bitOffset_0, dest) | ||
1058 | } | ||
1059 | while (++dest != destLim); | ||
1060 | } | ||
1061 | if ( bitOffset_0 != 0 | ||
1062 | || bitOffset_1 != (CBitCtr)sizes[0] | ||
1063 | || bitOffset_2 != (CBitCtr)sizes[1]) | ||
1064 | return SZ_ERROR_DATA; | ||
1065 | } | ||
1066 | } | ||
1067 | #else // Z7_ZSTD_DEC_USE_HUF_STREAM1_ALWAYS | ||
1068 | { | ||
1069 | unsigned i; | ||
1070 | for (i = 0; i < 4; i++) | ||
1071 | { | ||
1072 | size_t d = destLen; | ||
1073 | size_t size = srcLen; | ||
1074 | if (i != 3) | ||
1075 | { | ||
1076 | d = delta; | ||
1077 | size = sizes[i]; | ||
1078 | } | ||
1079 | if (i != 0) | ||
1080 | size -= sizes[i - 1]; | ||
1081 | destLen -= d; | ||
1082 | RINOK(Huf_Decompress_1stream(table, src, size, dest, d)) | ||
1083 | dest += d; | ||
1084 | src += size; | ||
1085 | } | ||
1086 | } | ||
1087 | #endif | ||
1088 | |||
1089 | return SZ_OK; | ||
1090 | } | ||
1091 | |||
1092 | |||
1093 | |||
1094 | // (in->len != 0) | ||
1095 | // we are allowed to access in->ptr[-3] | ||
1096 | // at least 2 bytes in (in->ptr) will be processed | ||
1097 | static SRes Huf_DecodeTable(CZstdDecHufTable *const p, CInBufPair *const in) | ||
1098 | { | ||
1099 | Byte weights[HUF_MAX_SYMBS + 1]; // +1 for extra write for loop unroll | ||
1100 | unsigned numSyms; | ||
1101 | const unsigned header = *(in->ptr)++; | ||
1102 | in->len--; | ||
1103 | // memset(weights, 0, sizeof(weights)); | ||
1104 | if (header >= 128) | ||
1105 | { | ||
1106 | // direct representation: 4 bits field (0-15) per weight | ||
1107 | numSyms = header - 127; | ||
1108 | // numSyms != 0 | ||
1109 | { | ||
1110 | const size_t numBytes = (numSyms + 1) / 2; | ||
1111 | const Byte *const ws = in->ptr; | ||
1112 | size_t i = 0; | ||
1113 | if (in->len < numBytes) | ||
1114 | return SZ_ERROR_DATA; | ||
1115 | in->ptr += numBytes; | ||
1116 | in->len -= numBytes; | ||
1117 | do | ||
1118 | { | ||
1119 | const unsigned b = ws[i]; | ||
1120 | weights[i * 2 ] = (Byte)(b >> 4); | ||
1121 | weights[i * 2 + 1] = (Byte)(b & 0xf); | ||
1122 | } | ||
1123 | while (++i != numBytes); | ||
1124 | /* 7ZIP: we can restore correct zero value for weights[numSyms], | ||
1125 | if we want to use zero values starting from numSyms in code below. */ | ||
1126 | // weights[numSyms] = 0; | ||
1127 | } | ||
1128 | } | ||
1129 | else | ||
1130 | { | ||
1131 | #define MAX_ACCURACY_LOG_FOR_WEIGHTS 6 | ||
1132 | CFseRecord table[1 << MAX_ACCURACY_LOG_FOR_WEIGHTS]; | ||
1133 | |||
1134 | Byte accuracy; | ||
1135 | const Byte *src; | ||
1136 | size_t srcLen; | ||
1137 | if (in->len < header) | ||
1138 | return SZ_ERROR_DATA; | ||
1139 | { | ||
1140 | CInBufPair fse_stream; | ||
1141 | fse_stream.len = header; | ||
1142 | fse_stream.ptr = in->ptr; | ||
1143 | in->ptr += header; | ||
1144 | in->len -= header; | ||
1145 | RINOK(FSE_DecodeHeader(table, &fse_stream, | ||
1146 | MAX_ACCURACY_LOG_FOR_WEIGHTS, | ||
1147 | &accuracy, | ||
1148 | 16 // num weight symbols max (max-symbol is 15) | ||
1149 | )) | ||
1150 | // at least 2 bytes were processed in fse_stream. | ||
1151 | // (srcLen > 0) after FSE_DecodeHeader() | ||
1152 | // if (srcLen == 0) return SZ_ERROR_DATA; | ||
1153 | src = fse_stream.ptr; | ||
1154 | srcLen = fse_stream.len; | ||
1155 | } | ||
1156 | // we are allowed to access src[-5] | ||
1157 | { | ||
1158 | // unsigned yyy = 200; do { | ||
1159 | CBitCtr bitOffset; | ||
1160 | FastInt32 state1, state2; | ||
1161 | SET_bitOffset_TO_PAD (bitOffset, src, srcLen) | ||
1162 | state1 = accuracy; | ||
1163 | src -= state1 >> 2; // src -= 1; // for GET16() optimization | ||
1164 | state1 <<= FSE_REC_LEN_OFFSET; | ||
1165 | state2 = state1; | ||
1166 | numSyms = 0; | ||
1167 | for (;;) | ||
1168 | { | ||
1169 | #define FSE_WEIGHT_DECODE(st) \ | ||
1170 | { \ | ||
1171 | const unsigned bits = GET_FSE_REC_LEN(st); \ | ||
1172 | FastInt r; \ | ||
1173 | GET16(r, src + (bitOffset >> 3)) \ | ||
1174 | r >>= (unsigned)bitOffset & 7; \ | ||
1175 | if ((CBitCtr_signed)(bitOffset -= (CBitCtr)bits) < 0) \ | ||
1176 | { if (bitOffset + (CBitCtr)bits != 0) \ | ||
1177 | return SZ_ERROR_DATA; \ | ||
1178 | break; } \ | ||
1179 | r &= 0xff; \ | ||
1180 | r >>= 8 - bits; \ | ||
1181 | st = table[GET_FSE_REC_STATE(st) + r]; \ | ||
1182 | weights[numSyms++] = (Byte)GET_FSE_REC_SYM(st); \ | ||
1183 | } | ||
1184 | FSE_WEIGHT_DECODE (state1) | ||
1185 | FSE_WEIGHT_DECODE (state2) | ||
1186 | if (numSyms == HUF_MAX_SYMBS) | ||
1187 | return SZ_ERROR_DATA; | ||
1188 | } | ||
1189 | // src += (unsigned)accuracy >> 2; } while (--yyy); | ||
1190 | } | ||
1191 | } | ||
1192 | |||
1193 | // Build using weights: | ||
1194 | { | ||
1195 | UInt32 sum = 0; | ||
1196 | { | ||
1197 | // numSyms >= 1 | ||
1198 | unsigned i = 0; | ||
1199 | weights[numSyms] = 0; | ||
1200 | do | ||
1201 | { | ||
1202 | sum += ((UInt32)1 << weights[i ]) & ~(UInt32)1; | ||
1203 | sum += ((UInt32)1 << weights[i + 1]) & ~(UInt32)1; | ||
1204 | i += 2; | ||
1205 | } | ||
1206 | while (i < numSyms); | ||
1207 | if (sum == 0) | ||
1208 | return SZ_ERROR_DATA; | ||
1209 | } | ||
1210 | { | ||
1211 | const unsigned maxBits = GetHighestSetBit_32_nonzero_big(sum) + 1; | ||
1212 | { | ||
1213 | const UInt32 left = ((UInt32)1 << maxBits) - sum; | ||
1214 | // (left != 0) | ||
1215 | // (left) must be power of 2 in correct stream | ||
1216 | if (left & (left - 1)) | ||
1217 | return SZ_ERROR_DATA; | ||
1218 | weights[numSyms++] = (Byte)GetHighestSetBit_32_nonzero_big(left); | ||
1219 | } | ||
1220 | // if (numSyms & 1) | ||
1221 | weights[numSyms] = 0; // for loop unroll | ||
1222 | // numSyms >= 2 | ||
1223 | { | ||
1224 | unsigned i = 0; | ||
1225 | do | ||
1226 | { | ||
1227 | /* | ||
1228 | #define WEIGHT_ITER(a) \ | ||
1229 | { unsigned w = weights[i + (a)]; \ | ||
1230 | const unsigned t = maxBits - w; \ | ||
1231 | w = w ? t: w; \ | ||
1232 | if (w > HUF_MAX_BITS) return SZ_ERROR_DATA; \ | ||
1233 | weights[i + (a)] = (Byte)w; } | ||
1234 | */ | ||
1235 | // /* | ||
1236 | #define WEIGHT_ITER(a) \ | ||
1237 | { unsigned w = weights[i + (a)]; \ | ||
1238 | if (w) { \ | ||
1239 | w = maxBits - w; \ | ||
1240 | if (w > HUF_MAX_BITS) return SZ_ERROR_DATA; \ | ||
1241 | weights[i + (a)] = (Byte)w; }} | ||
1242 | // */ | ||
1243 | WEIGHT_ITER(0) | ||
1244 | // WEIGHT_ITER(1) | ||
1245 | // i += 2; | ||
1246 | } | ||
1247 | while (++i != numSyms); | ||
1248 | } | ||
1249 | } | ||
1250 | } | ||
1251 | { | ||
1252 | // unsigned yyy; for (yyy = 0; yyy < 100; yyy++) | ||
1253 | Huf_Build((Byte *)(void *)p->table64, weights, numSyms); | ||
1254 | } | ||
1255 | return SZ_OK; | ||
1256 | } | ||
1257 | |||
1258 | |||
1259 | typedef enum | ||
1260 | { | ||
1261 | k_SeqMode_Predef = 0, | ||
1262 | k_SeqMode_RLE = 1, | ||
1263 | k_SeqMode_FSE = 2, | ||
1264 | k_SeqMode_Repeat = 3 | ||
1265 | } | ||
1266 | z7_zstd_enum_SeqMode; | ||
1267 | |||
1268 | // predefAccuracy == 5 for OFFSET symbols | ||
1269 | // predefAccuracy == 6 for MATCH/LIT LEN symbols | ||
1270 | static | ||
1271 | SRes | ||
1272 | Z7_NO_INLINE | ||
1273 | // Z7_FORCE_INLINE | ||
1274 | FSE_Decode_SeqTable(CFseRecord * const table, | ||
1275 | CInBufPair * const in, | ||
1276 | unsigned predefAccuracy, | ||
1277 | Byte * const accuracyRes, | ||
1278 | unsigned numSymbolsMax, | ||
1279 | const CFseRecord * const predefs, | ||
1280 | const unsigned seqMode) | ||
1281 | { | ||
1282 | // UNUSED_VAR(numSymsPredef) | ||
1283 | // UNUSED_VAR(predefFreqs) | ||
1284 | if (seqMode == k_SeqMode_FSE) | ||
1285 | { | ||
1286 | // unsigned y = 50; CInBufPair in2 = *in; do { *in = in2; RINOK( | ||
1287 | return | ||
1288 | FSE_DecodeHeader(table, in, | ||
1289 | predefAccuracy + 3, // accuracyMax | ||
1290 | accuracyRes, | ||
1291 | numSymbolsMax) | ||
1292 | ; | ||
1293 | // )} while (--y); return SZ_OK; | ||
1294 | } | ||
1295 | // numSymsMax = numSymsPredef + ((predefAccuracy & 1) * (32 - 29))); // numSymsMax | ||
1296 | // numSymsMax == 32 for offsets | ||
1297 | |||
1298 | if (seqMode == k_SeqMode_Predef) | ||
1299 | { | ||
1300 | *accuracyRes = (Byte)predefAccuracy; | ||
1301 | memcpy(table, predefs, sizeof(UInt32) << predefAccuracy); | ||
1302 | return SZ_OK; | ||
1303 | } | ||
1304 | |||
1305 | // (seqMode == k_SeqMode_RLE) | ||
1306 | if (in->len == 0) | ||
1307 | return SZ_ERROR_DATA; | ||
1308 | in->len--; | ||
1309 | { | ||
1310 | const Byte *ptr = in->ptr; | ||
1311 | const Byte sym = ptr[0]; | ||
1312 | in->ptr = ptr + 1; | ||
1313 | table[0] = (FastInt32)sym | ||
1314 | #if defined(Z7_ZSTD_DEC_USE_ML_PLUS3) | ||
1315 | + (numSymbolsMax == NUM_ML_SYMBOLS ? MATCH_LEN_MIN : 0) | ||
1316 | #endif | ||
1317 | ; | ||
1318 | *accuracyRes = 0; | ||
1319 | } | ||
1320 | return SZ_OK; | ||
1321 | } | ||
1322 | |||
1323 | |||
1324 | typedef struct | ||
1325 | { | ||
1326 | CFseRecord of[1 << 8]; | ||
1327 | CFseRecord ll[1 << 9]; | ||
1328 | CFseRecord ml[1 << 9]; | ||
1329 | } | ||
1330 | CZstdDecFseTables; | ||
1331 | |||
1332 | |||
1333 | typedef struct | ||
1334 | { | ||
1335 | Byte *win; | ||
1336 | SizeT cycSize; | ||
1337 | /* | ||
1338 | if (outBuf_fromCaller) : cycSize = outBufSize_fromCaller | ||
1339 | else { | ||
1340 | if ( isCyclicMode) : cycSize = cyclic_buffer_size = (winSize + extra_space) | ||
1341 | if (!isCyclicMode) : cycSize = ContentSize, | ||
1342 | (isCyclicMode == true) if (ContetSize >= winSize) or ContetSize is unknown | ||
1343 | } | ||
1344 | */ | ||
1345 | SizeT winPos; | ||
1346 | |||
1347 | CZstdDecOffset reps[3]; | ||
1348 | |||
1349 | Byte ll_accuracy; | ||
1350 | Byte of_accuracy; | ||
1351 | Byte ml_accuracy; | ||
1352 | // Byte seqTables_wereSet; | ||
1353 | Byte litHuf_wasSet; | ||
1354 | |||
1355 | Byte *literalsBase; | ||
1356 | |||
1357 | size_t winSize; // from header | ||
1358 | size_t totalOutCheck; // totalOutCheck <= winSize | ||
1359 | |||
1360 | #ifdef Z7_ZSTD_DEC_USE_BASES_IN_OBJECT | ||
1361 | SEQ_EXTRA_TABLES(m_) | ||
1362 | #endif | ||
1363 | // UInt64 _pad_Alignment; // is not required now | ||
1364 | CZstdDecFseTables fse; | ||
1365 | CZstdDecHufTable huf; | ||
1366 | } | ||
1367 | CZstdDec1; | ||
1368 | |||
1369 | #define ZstdDec1_GET_BLOCK_SIZE_LIMIT(p) \ | ||
1370 | ((p)->winSize < kBlockSizeMax ? (UInt32)(p)->winSize : kBlockSizeMax) | ||
1371 | |||
1372 | #define SEQ_TABLES_WERE_NOT_SET_ml_accuracy 1 // accuracy=1 is not used by zstd | ||
1373 | #define IS_SEQ_TABLES_WERE_SET(p) (((p)->ml_accuracy != SEQ_TABLES_WERE_NOT_SET_ml_accuracy)) | ||
1374 | // #define IS_SEQ_TABLES_WERE_SET(p) ((p)->seqTables_wereSet) | ||
1375 | |||
1376 | |||
1377 | static void ZstdDec1_Construct(CZstdDec1 *p) | ||
1378 | { | ||
1379 | #ifdef Z7_ZSTD_DEC_PRINT_TABLE | ||
1380 | Print_Predef(6, NUM_LL_SYMBOLS, SEQ_LL_PREDEF_DIST, k_PredefRecords_LL); | ||
1381 | Print_Predef(5, NUM_OFFSET_SYMBOLS_PREDEF, SEQ_OFFSET_PREDEF_DIST, k_PredefRecords_OF); | ||
1382 | Print_Predef(6, NUM_ML_SYMBOLS, SEQ_ML_PREDEF_DIST, k_PredefRecords_ML); | ||
1383 | #endif | ||
1384 | |||
1385 | p->win = NULL; | ||
1386 | p->cycSize = 0; | ||
1387 | p->literalsBase = NULL; | ||
1388 | #ifdef Z7_ZSTD_DEC_USE_BASES_IN_OBJECT | ||
1389 | FILL_LOC_BASES_ALL | ||
1390 | #endif | ||
1391 | } | ||
1392 | |||
1393 | |||
1394 | static void ZstdDec1_Init(CZstdDec1 *p) | ||
1395 | { | ||
1396 | p->reps[0] = 1; | ||
1397 | p->reps[1] = 4; | ||
1398 | p->reps[2] = 8; | ||
1399 | // p->seqTables_wereSet = False; | ||
1400 | p->ml_accuracy = SEQ_TABLES_WERE_NOT_SET_ml_accuracy; | ||
1401 | p->litHuf_wasSet = False; | ||
1402 | p->totalOutCheck = 0; | ||
1403 | } | ||
1404 | |||
1405 | |||
1406 | |||
1407 | #ifdef MY_CPU_LE_UNALIGN | ||
1408 | #define Z7_ZSTD_DEC_USE_UNALIGNED_COPY | ||
1409 | #endif | ||
1410 | |||
1411 | #ifdef Z7_ZSTD_DEC_USE_UNALIGNED_COPY | ||
1412 | |||
1413 | #define COPY_CHUNK_SIZE 16 | ||
1414 | |||
1415 | #define COPY_CHUNK_4_2(dest, src) \ | ||
1416 | { \ | ||
1417 | ((UInt32 *)(void *)dest)[0] = ((const UInt32 *)(const void *)src)[0]; \ | ||
1418 | ((UInt32 *)(void *)dest)[1] = ((const UInt32 *)(const void *)src)[1]; \ | ||
1419 | src += 4 * 2; \ | ||
1420 | dest += 4 * 2; \ | ||
1421 | } | ||
1422 | |||
1423 | /* sse2 doesn't help here in GCC and CLANG. | ||
1424 | so we disabled sse2 here */ | ||
1425 | /* | ||
1426 | #if defined(MY_CPU_AMD64) | ||
1427 | #define Z7_ZSTD_DEC_USE_SSE2 | ||
1428 | #elif defined(MY_CPU_X86) | ||
1429 | #if defined(_MSC_VER) && _MSC_VER >= 1300 && defined(_M_IX86_FP) && (_M_IX86_FP >= 2) \ | ||
1430 | || defined(__SSE2__) \ | ||
1431 | // || 1 == 1 // for debug only | ||
1432 | #define Z7_ZSTD_DEC_USE_SSE2 | ||
1433 | #endif | ||
1434 | #endif | ||
1435 | */ | ||
1436 | |||
1437 | #if defined(MY_CPU_ARM64) | ||
1438 | #define COPY_OFFSET_MIN 16 | ||
1439 | #define COPY_CHUNK1(dest, src) \ | ||
1440 | { \ | ||
1441 | vst1q_u8((uint8_t *)(void *)dest, \ | ||
1442 | vld1q_u8((const uint8_t *)(const void *)src)); \ | ||
1443 | src += 16; \ | ||
1444 | dest += 16; \ | ||
1445 | } | ||
1446 | |||
1447 | #define COPY_CHUNK(dest, src) \ | ||
1448 | { \ | ||
1449 | COPY_CHUNK1(dest, src) \ | ||
1450 | if ((len -= COPY_CHUNK_SIZE) == 0) break; \ | ||
1451 | COPY_CHUNK1(dest, src) \ | ||
1452 | } | ||
1453 | |||
1454 | #elif defined(Z7_ZSTD_DEC_USE_SSE2) | ||
1455 | #include <emmintrin.h> // sse2 | ||
1456 | #define COPY_OFFSET_MIN 16 | ||
1457 | |||
1458 | #define COPY_CHUNK1(dest, src) \ | ||
1459 | { \ | ||
1460 | _mm_storeu_si128((__m128i *)(void *)dest, \ | ||
1461 | _mm_loadu_si128((const __m128i *)(const void *)src)); \ | ||
1462 | src += 16; \ | ||
1463 | dest += 16; \ | ||
1464 | } | ||
1465 | |||
1466 | #define COPY_CHUNK(dest, src) \ | ||
1467 | { \ | ||
1468 | COPY_CHUNK1(dest, src) \ | ||
1469 | if ((len -= COPY_CHUNK_SIZE) == 0) break; \ | ||
1470 | COPY_CHUNK1(dest, src) \ | ||
1471 | } | ||
1472 | |||
1473 | #elif defined(MY_CPU_64BIT) | ||
1474 | #define COPY_OFFSET_MIN 8 | ||
1475 | |||
1476 | #define COPY_CHUNK(dest, src) \ | ||
1477 | { \ | ||
1478 | ((UInt64 *)(void *)dest)[0] = ((const UInt64 *)(const void *)src)[0]; \ | ||
1479 | ((UInt64 *)(void *)dest)[1] = ((const UInt64 *)(const void *)src)[1]; \ | ||
1480 | src += 8 * 2; \ | ||
1481 | dest += 8 * 2; \ | ||
1482 | } | ||
1483 | |||
1484 | #else | ||
1485 | #define COPY_OFFSET_MIN 4 | ||
1486 | |||
1487 | #define COPY_CHUNK(dest, src) \ | ||
1488 | { \ | ||
1489 | COPY_CHUNK_4_2(dest, src); \ | ||
1490 | COPY_CHUNK_4_2(dest, src); \ | ||
1491 | } | ||
1492 | |||
1493 | #endif | ||
1494 | #endif | ||
1495 | |||
1496 | |||
1497 | #ifndef COPY_CHUNK_SIZE | ||
1498 | #define COPY_OFFSET_MIN 4 | ||
1499 | #define COPY_CHUNK_SIZE 8 | ||
1500 | #define COPY_CHUNK_2(dest, src) \ | ||
1501 | { \ | ||
1502 | const Byte a0 = src[0]; \ | ||
1503 | const Byte a1 = src[1]; \ | ||
1504 | dest[0] = a0; \ | ||
1505 | dest[1] = a1; \ | ||
1506 | src += 2; \ | ||
1507 | dest += 2; \ | ||
1508 | } | ||
1509 | #define COPY_CHUNK(dest, src) \ | ||
1510 | { \ | ||
1511 | COPY_CHUNK_2(dest, src) \ | ||
1512 | COPY_CHUNK_2(dest, src) \ | ||
1513 | COPY_CHUNK_2(dest, src) \ | ||
1514 | COPY_CHUNK_2(dest, src) \ | ||
1515 | } | ||
1516 | #endif | ||
1517 | |||
1518 | |||
1519 | #define COPY_PREPARE \ | ||
1520 | len += (COPY_CHUNK_SIZE - 1); \ | ||
1521 | len &= ~(size_t)(COPY_CHUNK_SIZE - 1); \ | ||
1522 | { if (len > rem) \ | ||
1523 | { len = rem; \ | ||
1524 | rem &= (COPY_CHUNK_SIZE - 1); \ | ||
1525 | if (rem) { \ | ||
1526 | len -= rem; \ | ||
1527 | Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ | ||
1528 | do *dest++ = *src++; while (--rem); \ | ||
1529 | if (len == 0) return; }}} | ||
1530 | |||
1531 | #define COPY_CHUNKS \ | ||
1532 | { \ | ||
1533 | Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE \ | ||
1534 | do { COPY_CHUNK(dest, src) } \ | ||
1535 | while (len -= COPY_CHUNK_SIZE); \ | ||
1536 | } | ||
1537 | |||
1538 | // (len != 0) | ||
1539 | // (len <= rem) | ||
1540 | static | ||
1541 | Z7_FORCE_INLINE | ||
1542 | // Z7_ATTRIB_NO_VECTOR | ||
1543 | void CopyLiterals(Byte *dest, Byte const *src, size_t len, size_t rem) | ||
1544 | { | ||
1545 | COPY_PREPARE | ||
1546 | COPY_CHUNKS | ||
1547 | } | ||
1548 | |||
1549 | |||
1550 | /* we can define Z7_STD_DEC_USE_AFTER_CYC_BUF, if we want to use additional | ||
1551 | space after cycSize that can be used to reduce the code in CopyMatch(): */ | ||
1552 | // for debug: | ||
1553 | // #define Z7_STD_DEC_USE_AFTER_CYC_BUF | ||
1554 | |||
1555 | /* | ||
1556 | CopyMatch() | ||
1557 | if wrap (offset > winPos) | ||
1558 | { | ||
1559 | then we have at least (COPY_CHUNK_SIZE) avail in (dest) before we will overwrite (src): | ||
1560 | (cycSize >= offset + COPY_CHUNK_SIZE) | ||
1561 | if defined(Z7_STD_DEC_USE_AFTER_CYC_BUF) | ||
1562 | we are allowed to read win[cycSize + COPY_CHUNK_SIZE - 1], | ||
1563 | } | ||
1564 | (len != 0) | ||
1565 | */ | ||
1566 | static | ||
1567 | Z7_FORCE_INLINE | ||
1568 | // Z7_ATTRIB_NO_VECTOR | ||
1569 | void CopyMatch(size_t offset, size_t len, | ||
1570 | Byte *win, size_t winPos, size_t rem, const size_t cycSize) | ||
1571 | { | ||
1572 | Byte *dest = win + winPos; | ||
1573 | const Byte *src; | ||
1574 | // STAT_INC(g_NumCopy) | ||
1575 | |||
1576 | if (offset > winPos) | ||
1577 | { | ||
1578 | size_t back = offset - winPos; | ||
1579 | // src = win + cycSize - back; | ||
1580 | // cycSize -= offset; | ||
1581 | STAT_INC(g_NumOver) | ||
1582 | src = dest + (cycSize - offset); | ||
1583 | // (src >= dest) here | ||
1584 | #ifdef Z7_STD_DEC_USE_AFTER_CYC_BUF | ||
1585 | if (back < len) | ||
1586 | { | ||
1587 | #else | ||
1588 | if (back < len + (COPY_CHUNK_SIZE - 1)) | ||
1589 | { | ||
1590 | if (back >= len) | ||
1591 | { | ||
1592 | Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE | ||
1593 | do | ||
1594 | *dest++ = *src++; | ||
1595 | while (--len); | ||
1596 | return; | ||
1597 | } | ||
1598 | #endif | ||
1599 | // back < len | ||
1600 | STAT_INC(g_NumOver2) | ||
1601 | len -= back; | ||
1602 | rem -= back; | ||
1603 | Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE | ||
1604 | do | ||
1605 | *dest++ = *src++; | ||
1606 | while (--back); | ||
1607 | src = dest - offset; | ||
1608 | // src = win; | ||
1609 | // we go to MAIN-COPY | ||
1610 | } | ||
1611 | } | ||
1612 | else | ||
1613 | src = dest - offset; | ||
1614 | |||
1615 | // len != 0 | ||
1616 | // do *dest++ = *src++; while (--len); return; | ||
1617 | |||
1618 | // --- MAIN COPY --- | ||
1619 | // if (src >= dest), then ((size_t)(src - dest) >= COPY_CHUNK_SIZE) | ||
1620 | // so we have at least COPY_CHUNK_SIZE space before overlap for writing. | ||
1621 | COPY_PREPARE | ||
1622 | |||
1623 | /* now (len == COPY_CHUNK_SIZE * x) | ||
1624 | so we can unroll for aligned copy */ | ||
1625 | { | ||
1626 | // const unsigned b0 = src[0]; | ||
1627 | // (COPY_OFFSET_MIN >= 4) | ||
1628 | |||
1629 | if (offset >= COPY_OFFSET_MIN) | ||
1630 | { | ||
1631 | COPY_CHUNKS | ||
1632 | // return; | ||
1633 | } | ||
1634 | else | ||
1635 | #if (COPY_OFFSET_MIN > 4) | ||
1636 | #if COPY_CHUNK_SIZE < 8 | ||
1637 | #error Stop_Compiling_Bad_COPY_CHUNK_SIZE | ||
1638 | #endif | ||
1639 | if (offset >= 4) | ||
1640 | { | ||
1641 | Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE | ||
1642 | do | ||
1643 | { | ||
1644 | COPY_CHUNK_4_2(dest, src) | ||
1645 | #if COPY_CHUNK_SIZE != 16 | ||
1646 | if (len == 8) break; | ||
1647 | #endif | ||
1648 | COPY_CHUNK_4_2(dest, src) | ||
1649 | } | ||
1650 | while (len -= 16); | ||
1651 | // return; | ||
1652 | } | ||
1653 | else | ||
1654 | #endif | ||
1655 | { | ||
1656 | // (offset < 4) | ||
1657 | const unsigned b0 = src[0]; | ||
1658 | if (offset < 2) | ||
1659 | { | ||
1660 | #if defined(Z7_ZSTD_DEC_USE_UNALIGNED_COPY) && (COPY_CHUNK_SIZE == 16) | ||
1661 | #if defined(MY_CPU_64BIT) | ||
1662 | { | ||
1663 | const UInt64 v64 = (UInt64)b0 * 0x0101010101010101; | ||
1664 | Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE | ||
1665 | do | ||
1666 | { | ||
1667 | ((UInt64 *)(void *)dest)[0] = v64; | ||
1668 | ((UInt64 *)(void *)dest)[1] = v64; | ||
1669 | dest += 16; | ||
1670 | } | ||
1671 | while (len -= 16); | ||
1672 | } | ||
1673 | #else | ||
1674 | { | ||
1675 | UInt32 v = b0; | ||
1676 | v |= v << 8; | ||
1677 | v |= v << 16; | ||
1678 | do | ||
1679 | { | ||
1680 | ((UInt32 *)(void *)dest)[0] = v; | ||
1681 | ((UInt32 *)(void *)dest)[1] = v; | ||
1682 | dest += 8; | ||
1683 | ((UInt32 *)(void *)dest)[0] = v; | ||
1684 | ((UInt32 *)(void *)dest)[1] = v; | ||
1685 | dest += 8; | ||
1686 | } | ||
1687 | while (len -= 16); | ||
1688 | } | ||
1689 | #endif | ||
1690 | #else | ||
1691 | do | ||
1692 | { | ||
1693 | dest[0] = (Byte)b0; | ||
1694 | dest[1] = (Byte)b0; | ||
1695 | dest += 2; | ||
1696 | dest[0] = (Byte)b0; | ||
1697 | dest[1] = (Byte)b0; | ||
1698 | dest += 2; | ||
1699 | } | ||
1700 | while (len -= 4); | ||
1701 | #endif | ||
1702 | } | ||
1703 | else if (offset == 2) | ||
1704 | { | ||
1705 | const Byte b1 = src[1]; | ||
1706 | { | ||
1707 | do | ||
1708 | { | ||
1709 | dest[0] = (Byte)b0; | ||
1710 | dest[1] = b1; | ||
1711 | dest += 2; | ||
1712 | } | ||
1713 | while (len -= 2); | ||
1714 | } | ||
1715 | } | ||
1716 | else // (offset == 3) | ||
1717 | { | ||
1718 | const Byte *lim = dest + len - 2; | ||
1719 | const Byte b1 = src[1]; | ||
1720 | const Byte b2 = src[2]; | ||
1721 | do | ||
1722 | { | ||
1723 | dest[0] = (Byte)b0; | ||
1724 | dest[1] = b1; | ||
1725 | dest[2] = b2; | ||
1726 | dest += 3; | ||
1727 | } | ||
1728 | while (dest < lim); | ||
1729 | lim++; // points to last byte that must be written | ||
1730 | if (dest <= lim) | ||
1731 | { | ||
1732 | *dest = (Byte)b0; | ||
1733 | if (dest != lim) | ||
1734 | dest[1] = b1; | ||
1735 | } | ||
1736 | } | ||
1737 | } | ||
1738 | } | ||
1739 | } | ||
1740 | |||
1741 | |||
1742 | |||
1743 | #define UPDATE_TOTAL_OUT(p, size) \ | ||
1744 | { \ | ||
1745 | size_t _toc = (p)->totalOutCheck + (size); \ | ||
1746 | const size_t _ws = (p)->winSize; \ | ||
1747 | if (_toc >= _ws) _toc = _ws; \ | ||
1748 | (p)->totalOutCheck = _toc; \ | ||
1749 | } | ||
1750 | |||
1751 | |||
1752 | #if defined(MY_CPU_64BIT) && defined(MY_CPU_LE_UNALIGN) | ||
1753 | // we can disable it for debug: | ||
1754 | #define Z7_ZSTD_DEC_USE_64BIT_LOADS | ||
1755 | #endif | ||
1756 | // #define Z7_ZSTD_DEC_USE_64BIT_LOADS // for debug : slow in 32-bit | ||
1757 | |||
1758 | // SEQ_SRC_OFFSET: how many bytes (src) (seqSrc) was moved back from original value. | ||
1759 | // we need (SEQ_SRC_OFFSET != 0) for optimized memory access | ||
1760 | #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS | ||
1761 | #define SEQ_SRC_OFFSET 7 | ||
1762 | #else | ||
1763 | #define SEQ_SRC_OFFSET 3 | ||
1764 | #endif | ||
1765 | #define SRC_PLUS_FOR_4BYTES(bitOffset) (SEQ_SRC_OFFSET - 3) + ((CBitCtr_signed)(bitOffset) >> 3) | ||
1766 | #define BIT_OFFSET_7BITS(bitOffset) ((unsigned)(bitOffset) & 7) | ||
1767 | /* | ||
1768 | if (BIT_OFFSET_DELTA_BITS == 0) : bitOffset == number_of_unprocessed_bits | ||
1769 | if (BIT_OFFSET_DELTA_BITS == 1) : bitOffset == number_of_unprocessed_bits - 1 | ||
1770 | and we can read 1 bit more in that mode : (8 * n + 1). | ||
1771 | */ | ||
1772 | // #define BIT_OFFSET_DELTA_BITS 0 | ||
1773 | #define BIT_OFFSET_DELTA_BITS 1 | ||
1774 | #if BIT_OFFSET_DELTA_BITS == 1 | ||
1775 | #define GET_SHIFT_FROM_BOFFS7(boff7) (7 ^ (boff7)) | ||
1776 | #else | ||
1777 | #define GET_SHIFT_FROM_BOFFS7(boff7) (8 - BIT_OFFSET_DELTA_BITS - (boff7)) | ||
1778 | #endif | ||
1779 | |||
1780 | #define UPDATE_BIT_OFFSET(bitOffset, numBits) \ | ||
1781 | (bitOffset) -= (CBitCtr)(numBits); | ||
1782 | |||
1783 | #define GET_SHIFT(bitOffset) GET_SHIFT_FROM_BOFFS7(BIT_OFFSET_7BITS(bitOffset)) | ||
1784 | |||
1785 | |||
1786 | #if defined(Z7_ZSTD_DEC_USE_64BIT_LOADS) | ||
1787 | #if (NUM_OFFSET_SYMBOLS_MAX - BIT_OFFSET_DELTA_BITS < 32) | ||
1788 | /* if (NUM_OFFSET_SYMBOLS_MAX == 32 && BIT_OFFSET_DELTA_BITS == 1), | ||
1789 | we have depth 31 + 9 + 9 + 8 = 57 bits that can b read with single read. */ | ||
1790 | #define Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF | ||
1791 | #endif | ||
1792 | #ifndef Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF | ||
1793 | #if (BIT_OFFSET_DELTA_BITS == 1) | ||
1794 | /* if (winLimit - winPos <= (kBlockSizeMax = (1 << 17))) | ||
1795 | { | ||
1796 | the case (16 bits literal extra + 16 match extra) is not possible | ||
1797 | in correct stream. So error will be detected for (16 + 16) case. | ||
1798 | And longest correct sequence after offset reading is (31 + 9 + 9 + 8 = 57 bits). | ||
1799 | So we can use just one 64-bit load here in that case. | ||
1800 | } | ||
1801 | */ | ||
1802 | #define Z7_ZSTD_DEC_USE_64BIT_PRELOAD_ML | ||
1803 | #endif | ||
1804 | #endif | ||
1805 | #endif | ||
1806 | |||
1807 | |||
1808 | #if !defined(Z7_ZSTD_DEC_USE_64BIT_LOADS) || \ | ||
1809 | (!defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF) && \ | ||
1810 | !defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_ML)) | ||
1811 | // in : (0 < bits <= (24 or 25)): | ||
1812 | #define STREAM_READ_BITS(dest, bits) \ | ||
1813 | { \ | ||
1814 | GET32(dest, src + SRC_PLUS_FOR_4BYTES(bitOffset)) \ | ||
1815 | dest <<= GET_SHIFT(bitOffset); \ | ||
1816 | UPDATE_BIT_OFFSET(bitOffset, bits) \ | ||
1817 | dest >>= 32 - bits; \ | ||
1818 | } | ||
1819 | #endif | ||
1820 | |||
1821 | |||
1822 | #define FSE_Peek_1(table, state) table[state] | ||
1823 | |||
1824 | #define STATE_VAR(name) state_ ## name | ||
1825 | |||
1826 | // in : (0 <= accuracy <= (24 or 25)) | ||
1827 | #define FSE_INIT_STATE(name, cond) \ | ||
1828 | { \ | ||
1829 | UInt32 r; \ | ||
1830 | const unsigned bits = p->name ## _accuracy; \ | ||
1831 | GET32(r, src + SRC_PLUS_FOR_4BYTES(bitOffset)) \ | ||
1832 | r <<= GET_SHIFT(bitOffset); \ | ||
1833 | r >>= 1; \ | ||
1834 | r >>= 31 ^ bits; \ | ||
1835 | UPDATE_BIT_OFFSET(bitOffset, bits) \ | ||
1836 | cond \ | ||
1837 | STATE_VAR(name) = FSE_Peek_1(FSE_TABLE(name), r); \ | ||
1838 | /* STATE_VAR(name) = dest << 16; */ \ | ||
1839 | } | ||
1840 | |||
1841 | |||
1842 | #define FSE_Peek_Plus(name, r) \ | ||
1843 | STATE_VAR(name) = FSE_Peek_1(FSE_TABLE(name), \ | ||
1844 | GET_FSE_REC_STATE(STATE_VAR(name)) + r); | ||
1845 | |||
1846 | #define LZ_LOOP_ERROR_EXIT { return SZ_ERROR_DATA; } | ||
1847 | |||
1848 | #define BO_OVERFLOW_CHECK \ | ||
1849 | { if ((CBitCtr_signed)bitOffset < 0) LZ_LOOP_ERROR_EXIT } | ||
1850 | |||
1851 | |||
1852 | #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS | ||
1853 | |||
1854 | #define GET64(dest, p) { const Byte *ptr = p; dest = GetUi64(ptr); } | ||
1855 | |||
1856 | #define FSE_PRELOAD \ | ||
1857 | { \ | ||
1858 | GET64(v, src - 4 + SRC_PLUS_FOR_4BYTES(bitOffset)) \ | ||
1859 | v <<= GET_SHIFT(bitOffset); \ | ||
1860 | } | ||
1861 | |||
1862 | #define FSE_UPDATE_STATE_2(name, cond) \ | ||
1863 | { \ | ||
1864 | const unsigned bits = GET_FSE_REC_LEN(STATE_VAR(name)); \ | ||
1865 | UInt64 r = v; \ | ||
1866 | v <<= bits; \ | ||
1867 | r >>= 1; \ | ||
1868 | UPDATE_BIT_OFFSET(bitOffset, bits) \ | ||
1869 | cond \ | ||
1870 | r >>= 63 ^ bits; \ | ||
1871 | FSE_Peek_Plus(name, r); \ | ||
1872 | } | ||
1873 | |||
1874 | #define FSE_UPDATE_STATES \ | ||
1875 | FSE_UPDATE_STATE_2 (ll, {} ) \ | ||
1876 | FSE_UPDATE_STATE_2 (ml, {} ) \ | ||
1877 | FSE_UPDATE_STATE_2 (of, BO_OVERFLOW_CHECK) \ | ||
1878 | |||
1879 | #else // Z7_ZSTD_DEC_USE_64BIT_LOADS | ||
1880 | |||
1881 | // it supports 8 bits accuracy for any code | ||
1882 | // it supports 9 bits accuracy, if (BIT_OFFSET_DELTA_BITS == 1) | ||
1883 | #define FSE_UPDATE_STATE_0(name, cond) \ | ||
1884 | { \ | ||
1885 | UInt32 r; \ | ||
1886 | const unsigned bits = GET_FSE_REC_LEN(STATE_VAR(name)); \ | ||
1887 | GET16(r, src + 2 + SRC_PLUS_FOR_4BYTES(bitOffset)) \ | ||
1888 | r >>= (bitOffset & 7); \ | ||
1889 | r &= (1 << (8 + BIT_OFFSET_DELTA_BITS)) - 1; \ | ||
1890 | UPDATE_BIT_OFFSET(bitOffset, bits) \ | ||
1891 | cond \ | ||
1892 | r >>= (8 + BIT_OFFSET_DELTA_BITS) - bits; \ | ||
1893 | FSE_Peek_Plus(name, r); \ | ||
1894 | } | ||
1895 | |||
1896 | // for debug (slow): | ||
1897 | // #define Z7_ZSTD_DEC_USE_FSE_FUSION_FORCE | ||
1898 | #if BIT_OFFSET_DELTA_BITS == 0 || defined(Z7_ZSTD_DEC_USE_FSE_FUSION_FORCE) | ||
1899 | #define Z7_ZSTD_DEC_USE_FSE_FUSION | ||
1900 | #endif | ||
1901 | |||
1902 | #ifdef Z7_ZSTD_DEC_USE_FSE_FUSION | ||
1903 | #define FSE_UPDATE_STATE_1(name) \ | ||
1904 | { UInt32 rest2; \ | ||
1905 | { \ | ||
1906 | UInt32 r; \ | ||
1907 | unsigned bits; \ | ||
1908 | GET32(r, src + SRC_PLUS_FOR_4BYTES(bitOffset)) \ | ||
1909 | bits = GET_FSE_REC_LEN(STATE_VAR(name)); \ | ||
1910 | r <<= GET_SHIFT(bitOffset); \ | ||
1911 | rest2 = r << bits; \ | ||
1912 | r >>= 1; \ | ||
1913 | UPDATE_BIT_OFFSET(bitOffset, bits) \ | ||
1914 | r >>= 31 ^ bits; \ | ||
1915 | FSE_Peek_Plus(name, r); \ | ||
1916 | } | ||
1917 | |||
1918 | #define FSE_UPDATE_STATE_3(name) \ | ||
1919 | { \ | ||
1920 | const unsigned bits = GET_FSE_REC_LEN(STATE_VAR(name)); \ | ||
1921 | rest2 >>= 1; \ | ||
1922 | UPDATE_BIT_OFFSET(bitOffset, bits) \ | ||
1923 | rest2 >>= 31 ^ bits; \ | ||
1924 | FSE_Peek_Plus(name, rest2); \ | ||
1925 | }} | ||
1926 | |||
1927 | #define FSE_UPDATE_STATES \ | ||
1928 | FSE_UPDATE_STATE_1 (ll) \ | ||
1929 | FSE_UPDATE_STATE_3 (ml) \ | ||
1930 | FSE_UPDATE_STATE_0 (of, BO_OVERFLOW_CHECK) \ | ||
1931 | |||
1932 | #else // Z7_ZSTD_DEC_USE_64BIT_LOADS | ||
1933 | |||
1934 | #define FSE_UPDATE_STATES \ | ||
1935 | FSE_UPDATE_STATE_0 (ll, {} ) \ | ||
1936 | FSE_UPDATE_STATE_0 (ml, {} ) \ | ||
1937 | FSE_UPDATE_STATE_0 (of, BO_OVERFLOW_CHECK) \ | ||
1938 | |||
1939 | #endif // Z7_ZSTD_DEC_USE_FSE_FUSION | ||
1940 | #endif // Z7_ZSTD_DEC_USE_64BIT_LOADS | ||
1941 | |||
1942 | |||
1943 | |||
1944 | typedef struct | ||
1945 | { | ||
1946 | UInt32 numSeqs; | ||
1947 | UInt32 literalsLen; | ||
1948 | const Byte *literals; | ||
1949 | } | ||
1950 | CZstdDec1_Vars; | ||
1951 | |||
1952 | |||
1953 | // if (BIT_OFFSET_DELTA_BITS != 0), we need (BIT_OFFSET_DELTA_BYTES > 0) | ||
1954 | #define BIT_OFFSET_DELTA_BYTES BIT_OFFSET_DELTA_BITS | ||
1955 | |||
1956 | /* if (NUM_OFFSET_SYMBOLS_MAX == 32) | ||
1957 | max_seq_bit_length = (31) + 16 + 16 + 9 + 8 + 9 = 89 bits | ||
1958 | if defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF) we have longest backward | ||
1959 | lookahead offset, and we read UInt64 after literal_len reading. | ||
1960 | if (BIT_OFFSET_DELTA_BITS == 1 && NUM_OFFSET_SYMBOLS_MAX == 32) | ||
1961 | MAX_BACKWARD_DEPTH = 16 bytes | ||
1962 | */ | ||
1963 | #define MAX_BACKWARD_DEPTH \ | ||
1964 | ((NUM_OFFSET_SYMBOLS_MAX - 1 + 16 + 16 + 7) / 8 + 7 + BIT_OFFSET_DELTA_BYTES) | ||
1965 | |||
1966 | /* srcLen != 0 | ||
1967 | src == real_data_ptr - SEQ_SRC_OFFSET - BIT_OFFSET_DELTA_BYTES | ||
1968 | if defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_ML) then | ||
1969 | (winLimit - p->winPos <= (1 << 17)) is required | ||
1970 | */ | ||
1971 | static | ||
1972 | Z7_NO_INLINE | ||
1973 | // Z7_ATTRIB_NO_VECTOR | ||
1974 | SRes Decompress_Sequences(CZstdDec1 * const p, | ||
1975 | const Byte *src, const size_t srcLen, | ||
1976 | const size_t winLimit, | ||
1977 | const CZstdDec1_Vars * const vars) | ||
1978 | { | ||
1979 | #ifdef Z7_ZSTD_DEC_USE_BASES_LOCAL | ||
1980 | SEQ_EXTRA_TABLES(a_) | ||
1981 | #endif | ||
1982 | |||
1983 | // for debug: | ||
1984 | // #define Z7_ZSTD_DEC_USE_LOCAL_FSE_TABLES | ||
1985 | #ifdef Z7_ZSTD_DEC_USE_LOCAL_FSE_TABLES | ||
1986 | #define FSE_TABLE(n) fse. n | ||
1987 | const CZstdDecFseTables fse = p->fse; | ||
1988 | /* | ||
1989 | CZstdDecFseTables fse; | ||
1990 | #define COPY_FSE_TABLE(n) \ | ||
1991 | memcpy(fse. n, p->fse. n, (size_t)4 << p-> n ## _accuracy); | ||
1992 | COPY_FSE_TABLE(of) | ||
1993 | COPY_FSE_TABLE(ll) | ||
1994 | COPY_FSE_TABLE(ml) | ||
1995 | */ | ||
1996 | #else | ||
1997 | #define FSE_TABLE(n) (p->fse. n) | ||
1998 | #endif | ||
1999 | |||
2000 | #ifdef Z7_ZSTD_DEC_USE_BASES_LOCAL | ||
2001 | FILL_LOC_BASES_ALL | ||
2002 | #endif | ||
2003 | |||
2004 | { | ||
2005 | unsigned numSeqs = vars->numSeqs; | ||
2006 | const Byte *literals = vars->literals; | ||
2007 | ptrdiff_t literalsLen = (ptrdiff_t)vars->literalsLen; | ||
2008 | Byte * const win = p->win; | ||
2009 | size_t winPos = p->winPos; | ||
2010 | const size_t cycSize = p->cycSize; | ||
2011 | size_t totalOutCheck = p->totalOutCheck; | ||
2012 | const size_t winSize = p->winSize; | ||
2013 | size_t reps_0 = p->reps[0]; | ||
2014 | size_t reps_1 = p->reps[1]; | ||
2015 | size_t reps_2 = p->reps[2]; | ||
2016 | UInt32 STATE_VAR(ll), STATE_VAR(of), STATE_VAR(ml); | ||
2017 | CBitCtr bitOffset; | ||
2018 | |||
2019 | SET_bitOffset_TO_PAD (bitOffset, src + SEQ_SRC_OFFSET, srcLen + BIT_OFFSET_DELTA_BYTES) | ||
2020 | |||
2021 | bitOffset -= BIT_OFFSET_DELTA_BITS; | ||
2022 | |||
2023 | FSE_INIT_STATE(ll, {} ) | ||
2024 | FSE_INIT_STATE(of, {} ) | ||
2025 | FSE_INIT_STATE(ml, BO_OVERFLOW_CHECK) | ||
2026 | |||
2027 | for (;;) | ||
2028 | { | ||
2029 | size_t matchLen; | ||
2030 | #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS | ||
2031 | UInt64 v; | ||
2032 | #endif | ||
2033 | |||
2034 | #ifdef Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF | ||
2035 | FSE_PRELOAD | ||
2036 | #endif | ||
2037 | |||
2038 | // if (of_code == 0) | ||
2039 | if ((Byte)STATE_VAR(of) == 0) | ||
2040 | { | ||
2041 | if (GET_FSE_REC_SYM(STATE_VAR(ll)) == 0) | ||
2042 | { | ||
2043 | const size_t offset = reps_1; | ||
2044 | reps_1 = reps_0; | ||
2045 | reps_0 = offset; | ||
2046 | STAT_INC(g_Num_Rep1) | ||
2047 | } | ||
2048 | STAT_UPDATE(else g_Num_Rep0++;) | ||
2049 | } | ||
2050 | else | ||
2051 | { | ||
2052 | const unsigned of_code = (Byte)STATE_VAR(of); | ||
2053 | |||
2054 | #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS | ||
2055 | #if !defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF) | ||
2056 | FSE_PRELOAD | ||
2057 | #endif | ||
2058 | #else | ||
2059 | UInt32 v; | ||
2060 | { | ||
2061 | const Byte *src4 = src + SRC_PLUS_FOR_4BYTES(bitOffset); | ||
2062 | const unsigned skip = GET_SHIFT(bitOffset); | ||
2063 | GET32(v, src4) | ||
2064 | v <<= skip; | ||
2065 | v |= (UInt32)src4[-1] >> (8 - skip); | ||
2066 | } | ||
2067 | #endif | ||
2068 | |||
2069 | UPDATE_BIT_OFFSET(bitOffset, of_code) | ||
2070 | |||
2071 | if (of_code == 1) | ||
2072 | { | ||
2073 | // read 1 bit | ||
2074 | #if defined(Z7_MSC_VER_ORIGINAL) || defined(MY_CPU_X86_OR_AMD64) | ||
2075 | #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS | ||
2076 | #define CHECK_HIGH_BIT_64(a) ((Int64)(UInt64)(a) < 0) | ||
2077 | #else | ||
2078 | #define CHECK_HIGH_BIT_32(a) ((Int32)(UInt32)(a) < 0) | ||
2079 | #endif | ||
2080 | #else | ||
2081 | #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS | ||
2082 | #define CHECK_HIGH_BIT_64(a) ((UInt64)(a) & ((UInt64)1 << 63)) | ||
2083 | #else | ||
2084 | #define CHECK_HIGH_BIT_32(a) ((UInt32)(a) & ((UInt32)1 << 31)) | ||
2085 | #endif | ||
2086 | #endif | ||
2087 | |||
2088 | if | ||
2089 | #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS | ||
2090 | CHECK_HIGH_BIT_64 (((UInt64)GET_FSE_REC_SYM(STATE_VAR(ll)) - 1) ^ v) | ||
2091 | #else | ||
2092 | CHECK_HIGH_BIT_32 (((UInt32)GET_FSE_REC_SYM(STATE_VAR(ll)) - 1) ^ v) | ||
2093 | #endif | ||
2094 | { | ||
2095 | v <<= 1; | ||
2096 | { | ||
2097 | const size_t offset = reps_2; | ||
2098 | reps_2 = reps_1; | ||
2099 | reps_1 = reps_0; | ||
2100 | reps_0 = offset; | ||
2101 | STAT_INC(g_Num_Rep2) | ||
2102 | } | ||
2103 | } | ||
2104 | else | ||
2105 | { | ||
2106 | if (GET_FSE_REC_SYM(STATE_VAR(ll)) == 0) | ||
2107 | { | ||
2108 | // litLen == 0 && bit == 1 | ||
2109 | STAT_INC(g_Num_Rep3) | ||
2110 | v <<= 1; | ||
2111 | reps_2 = reps_1; | ||
2112 | reps_1 = reps_0; | ||
2113 | if (--reps_0 == 0) | ||
2114 | { | ||
2115 | // LZ_LOOP_ERROR_EXIT | ||
2116 | // original-zstd decoder : input is corrupted; force offset to 1 | ||
2117 | // reps_0 = 1; | ||
2118 | reps_0++; | ||
2119 | } | ||
2120 | } | ||
2121 | else | ||
2122 | { | ||
2123 | // litLen != 0 && bit == 0 | ||
2124 | v <<= 1; | ||
2125 | { | ||
2126 | const size_t offset = reps_1; | ||
2127 | reps_1 = reps_0; | ||
2128 | reps_0 = offset; | ||
2129 | STAT_INC(g_Num_Rep1) | ||
2130 | } | ||
2131 | } | ||
2132 | } | ||
2133 | } | ||
2134 | else | ||
2135 | { | ||
2136 | // (2 <= of_code) | ||
2137 | // if (of_code >= 32) LZ_LOOP_ERROR_EXIT // optional check | ||
2138 | // we don't allow (of_code >= 32) cases in another code | ||
2139 | reps_2 = reps_1; | ||
2140 | reps_1 = reps_0; | ||
2141 | reps_0 = ((size_t)1 << of_code) - 3 + (size_t) | ||
2142 | #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS | ||
2143 | (v >> (64 - of_code)); | ||
2144 | v <<= of_code; | ||
2145 | #else | ||
2146 | (v >> (32 - of_code)); | ||
2147 | #endif | ||
2148 | } | ||
2149 | } | ||
2150 | |||
2151 | #ifdef Z7_ZSTD_DEC_USE_64BIT_PRELOAD_ML | ||
2152 | FSE_PRELOAD | ||
2153 | #endif | ||
2154 | |||
2155 | matchLen = (size_t)GET_FSE_REC_SYM(STATE_VAR(ml)) | ||
2156 | #ifndef Z7_ZSTD_DEC_USE_ML_PLUS3 | ||
2157 | + MATCH_LEN_MIN | ||
2158 | #endif | ||
2159 | ; | ||
2160 | { | ||
2161 | { | ||
2162 | if (matchLen >= 32 + MATCH_LEN_MIN) // if (state_ml & 0x20) | ||
2163 | { | ||
2164 | const unsigned extra = BASES_TABLE(SEQ_ML_EXTRA) [(size_t)matchLen - MATCH_LEN_MIN]; | ||
2165 | matchLen = BASES_TABLE(SEQ_ML_BASES) [(size_t)matchLen - MATCH_LEN_MIN]; | ||
2166 | #if defined(Z7_ZSTD_DEC_USE_64BIT_LOADS) && \ | ||
2167 | (defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_ML) || \ | ||
2168 | defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF)) | ||
2169 | { | ||
2170 | UPDATE_BIT_OFFSET(bitOffset, extra) | ||
2171 | matchLen += (size_t)(v >> (64 - extra)); | ||
2172 | #if defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF) | ||
2173 | FSE_PRELOAD | ||
2174 | #else | ||
2175 | v <<= extra; | ||
2176 | #endif | ||
2177 | } | ||
2178 | #else | ||
2179 | { | ||
2180 | UInt32 v32; | ||
2181 | STREAM_READ_BITS(v32, extra) | ||
2182 | matchLen += v32; | ||
2183 | } | ||
2184 | #endif | ||
2185 | STAT_INC(g_Num_Match) | ||
2186 | } | ||
2187 | } | ||
2188 | } | ||
2189 | |||
2190 | #if defined(Z7_ZSTD_DEC_USE_64BIT_LOADS) && \ | ||
2191 | !defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF) && \ | ||
2192 | !defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_ML) | ||
2193 | FSE_PRELOAD | ||
2194 | #endif | ||
2195 | |||
2196 | { | ||
2197 | size_t litLen = GET_FSE_REC_SYM(STATE_VAR(ll)); | ||
2198 | if (litLen) | ||
2199 | { | ||
2200 | // if (STATE_VAR(ll) & 0x70) | ||
2201 | if (litLen >= 16) | ||
2202 | { | ||
2203 | const unsigned extra = BASES_TABLE(SEQ_LL_EXTRA) [litLen]; | ||
2204 | litLen = BASES_TABLE(SEQ_LL_BASES) [litLen]; | ||
2205 | #ifdef Z7_ZSTD_DEC_USE_64BIT_LOADS | ||
2206 | { | ||
2207 | UPDATE_BIT_OFFSET(bitOffset, extra) | ||
2208 | litLen += (size_t)(v >> (64 - extra)); | ||
2209 | #if defined(Z7_ZSTD_DEC_USE_64BIT_PRELOAD_OF) | ||
2210 | FSE_PRELOAD | ||
2211 | #else | ||
2212 | v <<= extra; | ||
2213 | #endif | ||
2214 | } | ||
2215 | #else | ||
2216 | { | ||
2217 | UInt32 v32; | ||
2218 | STREAM_READ_BITS(v32, extra) | ||
2219 | litLen += v32; | ||
2220 | } | ||
2221 | #endif | ||
2222 | STAT_INC(g_Num_LitsBig) | ||
2223 | } | ||
2224 | |||
2225 | if ((literalsLen -= (ptrdiff_t)litLen) < 0) | ||
2226 | LZ_LOOP_ERROR_EXIT | ||
2227 | totalOutCheck += litLen; | ||
2228 | { | ||
2229 | const size_t rem = winLimit - winPos; | ||
2230 | if (litLen > rem) | ||
2231 | LZ_LOOP_ERROR_EXIT | ||
2232 | { | ||
2233 | const Byte *literals_temp = literals; | ||
2234 | Byte *d = win + winPos; | ||
2235 | literals += litLen; | ||
2236 | winPos += litLen; | ||
2237 | CopyLiterals(d, literals_temp, litLen, rem); | ||
2238 | } | ||
2239 | } | ||
2240 | } | ||
2241 | STAT_UPDATE(else g_Num_Lit0++;) | ||
2242 | } | ||
2243 | |||
2244 | #define COPY_MATCH \ | ||
2245 | { if (reps_0 > winSize || reps_0 > totalOutCheck) LZ_LOOP_ERROR_EXIT \ | ||
2246 | totalOutCheck += matchLen; \ | ||
2247 | { const size_t rem = winLimit - winPos; \ | ||
2248 | if (matchLen > rem) LZ_LOOP_ERROR_EXIT \ | ||
2249 | { const size_t winPos_temp = winPos; \ | ||
2250 | winPos += matchLen; \ | ||
2251 | CopyMatch(reps_0, matchLen, win, winPos_temp, rem, cycSize); }}} | ||
2252 | |||
2253 | if (--numSeqs == 0) | ||
2254 | { | ||
2255 | COPY_MATCH | ||
2256 | break; | ||
2257 | } | ||
2258 | FSE_UPDATE_STATES | ||
2259 | COPY_MATCH | ||
2260 | } // for | ||
2261 | |||
2262 | if ((CBitCtr_signed)bitOffset != BIT_OFFSET_DELTA_BYTES * 8 - BIT_OFFSET_DELTA_BITS) | ||
2263 | return SZ_ERROR_DATA; | ||
2264 | |||
2265 | if (literalsLen) | ||
2266 | { | ||
2267 | const size_t rem = winLimit - winPos; | ||
2268 | if ((size_t)literalsLen > rem) | ||
2269 | return SZ_ERROR_DATA; | ||
2270 | { | ||
2271 | Byte *d = win + winPos; | ||
2272 | winPos += (size_t)literalsLen; | ||
2273 | totalOutCheck += (size_t)literalsLen; | ||
2274 | CopyLiterals | ||
2275 | // memcpy | ||
2276 | (d, literals, (size_t)literalsLen, rem); | ||
2277 | } | ||
2278 | } | ||
2279 | if (totalOutCheck >= winSize) | ||
2280 | totalOutCheck = winSize; | ||
2281 | p->totalOutCheck = totalOutCheck; | ||
2282 | p->winPos = winPos; | ||
2283 | p->reps[0] = (CZstdDecOffset)reps_0; | ||
2284 | p->reps[1] = (CZstdDecOffset)reps_1; | ||
2285 | p->reps[2] = (CZstdDecOffset)reps_2; | ||
2286 | } | ||
2287 | return SZ_OK; | ||
2288 | } | ||
2289 | |||
2290 | |||
2291 | // for debug: define to check that ZstdDec1_NeedTempBufferForInput() works correctly: | ||
2292 | // #define Z7_ZSTD_DEC_USE_CHECK_OF_NEED_TEMP // define it for debug only | ||
2293 | #ifdef Z7_ZSTD_DEC_USE_CHECK_OF_NEED_TEMP | ||
2294 | static unsigned g_numSeqs; | ||
2295 | #endif | ||
2296 | |||
2297 | |||
2298 | #define k_LitBlockType_Flag_RLE_or_Treeless 1 | ||
2299 | #define k_LitBlockType_Flag_Compressed 2 | ||
2300 | |||
2301 | // outLimit : is strong limit | ||
2302 | // outLimit <= ZstdDec1_GET_BLOCK_SIZE_LIMIT(p) | ||
2303 | // inSize != 0 | ||
2304 | static | ||
2305 | Z7_NO_INLINE | ||
2306 | SRes ZstdDec1_DecodeBlock(CZstdDec1 *p, | ||
2307 | const Byte *src, SizeT inSize, SizeT afterAvail, | ||
2308 | const size_t outLimit) | ||
2309 | { | ||
2310 | CZstdDec1_Vars vars; | ||
2311 | vars.literals = p->literalsBase; | ||
2312 | { | ||
2313 | const unsigned b0 = *src++; | ||
2314 | UInt32 numLits, compressedSize; | ||
2315 | const Byte *litStream; | ||
2316 | Byte *literalsDest; | ||
2317 | inSize--; | ||
2318 | |||
2319 | if ((b0 & k_LitBlockType_Flag_Compressed) == 0) | ||
2320 | { | ||
2321 | // we need at least one additional byte for (numSeqs). | ||
2322 | // so we check for that additional byte in conditions. | ||
2323 | numLits = b0 >> 3; | ||
2324 | if (b0 & 4) | ||
2325 | { | ||
2326 | UInt32 v; | ||
2327 | if (inSize < 1 + 1) // we need at least 1 byte here and 1 byte for (numSeqs). | ||
2328 | return SZ_ERROR_DATA; | ||
2329 | numLits >>= 1; | ||
2330 | v = GetUi16(src); | ||
2331 | src += 2; | ||
2332 | inSize -= 2; | ||
2333 | if ((b0 & 8) == 0) | ||
2334 | { | ||
2335 | src--; | ||
2336 | inSize++; | ||
2337 | v = (Byte)v; | ||
2338 | } | ||
2339 | numLits += v << 4; | ||
2340 | } | ||
2341 | compressedSize = 1; | ||
2342 | if ((b0 & k_LitBlockType_Flag_RLE_or_Treeless) == 0) | ||
2343 | compressedSize = numLits; | ||
2344 | } | ||
2345 | else if (inSize < 4) | ||
2346 | return SZ_ERROR_DATA; | ||
2347 | else | ||
2348 | { | ||
2349 | const unsigned mode4Streams = b0 & 0xc; | ||
2350 | const unsigned numBytes = (3 * mode4Streams + 32) >> 4; | ||
2351 | const unsigned numBits = 4 * numBytes - 2; | ||
2352 | const UInt32 mask = ((UInt32)16 << numBits) - 1; | ||
2353 | compressedSize = GetUi32(src); | ||
2354 | numLits = (( | ||
2355 | #ifdef MY_CPU_LE_UNALIGN | ||
2356 | GetUi32(src - 1) | ||
2357 | #else | ||
2358 | ((compressedSize << 8) + b0) | ||
2359 | #endif | ||
2360 | ) >> 4) & mask; | ||
2361 | src += numBytes; | ||
2362 | inSize -= numBytes; | ||
2363 | compressedSize >>= numBits; | ||
2364 | compressedSize &= mask; | ||
2365 | /* | ||
2366 | if (numLits != 0) printf("inSize = %7u num_lits=%7u compressed=%7u ratio = %u ratio2 = %u\n", | ||
2367 | i1, numLits, (unsigned)compressedSize * 1, (unsigned)compressedSize * 100 / numLits, | ||
2368 | (unsigned)numLits * 100 / (unsigned)inSize); | ||
2369 | } | ||
2370 | */ | ||
2371 | if (compressedSize == 0) | ||
2372 | return SZ_ERROR_DATA; // (compressedSize == 0) is not allowed | ||
2373 | } | ||
2374 | |||
2375 | STAT_UPDATE(g_Num_Lits += numLits;) | ||
2376 | |||
2377 | vars.literalsLen = numLits; | ||
2378 | |||
2379 | if (compressedSize >= inSize) | ||
2380 | return SZ_ERROR_DATA; | ||
2381 | litStream = src; | ||
2382 | src += compressedSize; | ||
2383 | inSize -= compressedSize; | ||
2384 | // inSize != 0 | ||
2385 | { | ||
2386 | UInt32 numSeqs = *src++; | ||
2387 | inSize--; | ||
2388 | if (numSeqs > 127) | ||
2389 | { | ||
2390 | UInt32 b1; | ||
2391 | if (inSize == 0) | ||
2392 | return SZ_ERROR_DATA; | ||
2393 | numSeqs -= 128; | ||
2394 | b1 = *src++; | ||
2395 | inSize--; | ||
2396 | if (numSeqs == 127) | ||
2397 | { | ||
2398 | if (inSize == 0) | ||
2399 | return SZ_ERROR_DATA; | ||
2400 | numSeqs = (UInt32)(*src++) + 127; | ||
2401 | inSize--; | ||
2402 | } | ||
2403 | numSeqs = (numSeqs << 8) + b1; | ||
2404 | } | ||
2405 | if (numSeqs * MATCH_LEN_MIN + numLits > outLimit) | ||
2406 | return SZ_ERROR_DATA; | ||
2407 | vars.numSeqs = numSeqs; | ||
2408 | |||
2409 | STAT_UPDATE(g_NumSeqs_total += numSeqs;) | ||
2410 | /* | ||
2411 | #ifdef SHOW_STAT | ||
2412 | printf("\n %5u : %8u, %8u : %5u", (int)g_Num_Blocks_Compressed, (int)numSeqs, (int)g_NumSeqs_total, | ||
2413 | (int)g_NumSeqs_total / g_Num_Blocks_Compressed); | ||
2414 | #endif | ||
2415 | // printf("\nnumSeqs2 = %d", numSeqs); | ||
2416 | */ | ||
2417 | #ifdef Z7_ZSTD_DEC_USE_CHECK_OF_NEED_TEMP | ||
2418 | if (numSeqs != g_numSeqs) return SZ_ERROR_DATA; // for debug | ||
2419 | #endif | ||
2420 | if (numSeqs == 0) | ||
2421 | { | ||
2422 | if (inSize != 0) | ||
2423 | return SZ_ERROR_DATA; | ||
2424 | literalsDest = p->win + p->winPos; | ||
2425 | } | ||
2426 | else | ||
2427 | literalsDest = p->literalsBase; | ||
2428 | } | ||
2429 | |||
2430 | if ((b0 & k_LitBlockType_Flag_Compressed) == 0) | ||
2431 | { | ||
2432 | if (b0 & k_LitBlockType_Flag_RLE_or_Treeless) | ||
2433 | { | ||
2434 | memset(literalsDest, litStream[0], numLits); | ||
2435 | if (vars.numSeqs) | ||
2436 | { | ||
2437 | // literalsDest == p->literalsBase == vars.literals | ||
2438 | #if COPY_CHUNK_SIZE > 1 | ||
2439 | memset(p->literalsBase + numLits, 0, COPY_CHUNK_SIZE); | ||
2440 | #endif | ||
2441 | } | ||
2442 | } | ||
2443 | else | ||
2444 | { | ||
2445 | // unsigned y; | ||
2446 | // for (y = 0; y < 10000; y++) | ||
2447 | memcpy(literalsDest, litStream, numLits); | ||
2448 | if (vars.numSeqs) | ||
2449 | { | ||
2450 | /* we need up to (15 == COPY_CHUNK_SIZE - 1) space for optimized CopyLiterals(). | ||
2451 | If we have additional space in input stream after literals stream, | ||
2452 | we use direct copy of rar literals in input stream */ | ||
2453 | if ((size_t)(src + inSize - litStream) - numLits + afterAvail >= (COPY_CHUNK_SIZE - 1)) | ||
2454 | vars.literals = litStream; | ||
2455 | else | ||
2456 | { | ||
2457 | // literalsDest == p->literalsBase == vars.literals | ||
2458 | #if COPY_CHUNK_SIZE > 1 | ||
2459 | /* CopyLiterals(): | ||
2460 | 1) we don't want reading non-initialized data | ||
2461 | 2) we will copy only zero byte after literals buffer */ | ||
2462 | memset(p->literalsBase + numLits, 0, COPY_CHUNK_SIZE); | ||
2463 | #endif | ||
2464 | } | ||
2465 | } | ||
2466 | } | ||
2467 | } | ||
2468 | else | ||
2469 | { | ||
2470 | CInBufPair hufStream; | ||
2471 | hufStream.ptr = litStream; | ||
2472 | hufStream.len = compressedSize; | ||
2473 | |||
2474 | if ((b0 & k_LitBlockType_Flag_RLE_or_Treeless) == 0) | ||
2475 | { | ||
2476 | // unsigned y = 100; CInBufPair hs2 = hufStream; do { hufStream = hs2; | ||
2477 | RINOK(Huf_DecodeTable(&p->huf, &hufStream)) | ||
2478 | p->litHuf_wasSet = True; | ||
2479 | // } while (--y); | ||
2480 | } | ||
2481 | else if (!p->litHuf_wasSet) | ||
2482 | return SZ_ERROR_DATA; | ||
2483 | |||
2484 | { | ||
2485 | // int yyy; for (yyy = 0; yyy < 34; yyy++) { | ||
2486 | SRes sres; | ||
2487 | if ((b0 & 0xc) == 0) // mode4Streams | ||
2488 | sres = Huf_Decompress_1stream((const Byte *)(const void *)p->huf.table64, | ||
2489 | hufStream.ptr - HUF_SRC_OFFSET, hufStream.len, literalsDest, numLits); | ||
2490 | else | ||
2491 | { | ||
2492 | // 6 bytes for the jump table + 4x1 bytes of end-padding Bytes) | ||
2493 | if (hufStream.len < 6 + 4) | ||
2494 | return SZ_ERROR_DATA; | ||
2495 | // the condition from original-zstd decoder: | ||
2496 | #define Z7_ZSTD_MIN_LITERALS_FOR_4_STREAMS 6 | ||
2497 | if (numLits < Z7_ZSTD_MIN_LITERALS_FOR_4_STREAMS) | ||
2498 | return SZ_ERROR_DATA; | ||
2499 | sres = Huf_Decompress_4stream((const Byte *)(const void *)p->huf.table64, | ||
2500 | hufStream.ptr + (6 - HUF_SRC_OFFSET), hufStream.len, literalsDest, numLits); | ||
2501 | } | ||
2502 | RINOK(sres) | ||
2503 | // } | ||
2504 | } | ||
2505 | } | ||
2506 | |||
2507 | if (vars.numSeqs == 0) | ||
2508 | { | ||
2509 | p->winPos += numLits; | ||
2510 | return SZ_OK; | ||
2511 | } | ||
2512 | } | ||
2513 | { | ||
2514 | CInBufPair in; | ||
2515 | unsigned mode; | ||
2516 | unsigned seqMode; | ||
2517 | |||
2518 | in.ptr = src; | ||
2519 | in.len = inSize; | ||
2520 | if (in.len == 0) | ||
2521 | return SZ_ERROR_DATA; | ||
2522 | in.len--; | ||
2523 | mode = *in.ptr++; | ||
2524 | if (mode & 3) // Reserved bits | ||
2525 | return SZ_ERROR_DATA; | ||
2526 | |||
2527 | seqMode = (mode >> 6); | ||
2528 | if (seqMode == k_SeqMode_Repeat) | ||
2529 | { if (!IS_SEQ_TABLES_WERE_SET(p)) return SZ_ERROR_DATA; } | ||
2530 | else RINOK(FSE_Decode_SeqTable( | ||
2531 | p->fse.ll, | ||
2532 | &in, | ||
2533 | 6, // predefAccuracy | ||
2534 | &p->ll_accuracy, | ||
2535 | NUM_LL_SYMBOLS, | ||
2536 | k_PredefRecords_LL, | ||
2537 | seqMode)) | ||
2538 | |||
2539 | seqMode = (mode >> 4) & 3; | ||
2540 | if (seqMode == k_SeqMode_Repeat) | ||
2541 | { if (!IS_SEQ_TABLES_WERE_SET(p)) return SZ_ERROR_DATA; } | ||
2542 | else RINOK(FSE_Decode_SeqTable( | ||
2543 | p->fse.of, | ||
2544 | &in, | ||
2545 | 5, // predefAccuracy | ||
2546 | &p->of_accuracy, | ||
2547 | NUM_OFFSET_SYMBOLS_MAX, | ||
2548 | k_PredefRecords_OF, | ||
2549 | seqMode)) | ||
2550 | |||
2551 | seqMode = (mode >> 2) & 3; | ||
2552 | if (seqMode == k_SeqMode_Repeat) | ||
2553 | { if (!IS_SEQ_TABLES_WERE_SET(p)) return SZ_ERROR_DATA; } | ||
2554 | else | ||
2555 | { | ||
2556 | RINOK(FSE_Decode_SeqTable( | ||
2557 | p->fse.ml, | ||
2558 | &in, | ||
2559 | 6, // predefAccuracy | ||
2560 | &p->ml_accuracy, | ||
2561 | NUM_ML_SYMBOLS, | ||
2562 | k_PredefRecords_ML, | ||
2563 | seqMode)) | ||
2564 | /* | ||
2565 | #if defined(Z7_ZSTD_DEC_USE_ML_PLUS3) | ||
2566 | // { unsigned y = 1 << 10; do | ||
2567 | { | ||
2568 | const unsigned accuracy = p->ml_accuracy; | ||
2569 | if (accuracy == 0) | ||
2570 | p->fse.ml[0] += 3; | ||
2571 | else | ||
2572 | #ifdef MY_CPU_64BIT | ||
2573 | { | ||
2574 | // alignemt (UInt64 _pad_Alignment) in fse.ml is required for that code | ||
2575 | UInt64 *table = (UInt64 *)(void *)p->fse.ml; | ||
2576 | const UInt64 *end = (const UInt64 *)(const void *) | ||
2577 | ((const Byte *)(const void *)table + ((size_t)sizeof(CFseRecord) << accuracy)); | ||
2578 | do | ||
2579 | { | ||
2580 | table[0] += ((UInt64)MATCH_LEN_MIN << 32) + MATCH_LEN_MIN; | ||
2581 | table[1] += ((UInt64)MATCH_LEN_MIN << 32) + MATCH_LEN_MIN; | ||
2582 | table += 2; | ||
2583 | } | ||
2584 | while (table != end); | ||
2585 | } | ||
2586 | #else | ||
2587 | { | ||
2588 | UInt32 *table = p->fse.ml; | ||
2589 | const UInt32 *end = (const UInt32 *)(const void *) | ||
2590 | ((const Byte *)(const void *)table + ((size_t)sizeof(CFseRecord) << accuracy)); | ||
2591 | do | ||
2592 | { | ||
2593 | table[0] += MATCH_LEN_MIN; | ||
2594 | table[1] += MATCH_LEN_MIN; | ||
2595 | table += 2; | ||
2596 | table[0] += MATCH_LEN_MIN; | ||
2597 | table[1] += MATCH_LEN_MIN; | ||
2598 | table += 2; | ||
2599 | } | ||
2600 | while (table != end); | ||
2601 | } | ||
2602 | #endif | ||
2603 | } | ||
2604 | // while (--y); } | ||
2605 | #endif | ||
2606 | */ | ||
2607 | } | ||
2608 | |||
2609 | // p->seqTables_wereSet = True; | ||
2610 | if (in.len == 0) | ||
2611 | return SZ_ERROR_DATA; | ||
2612 | return Decompress_Sequences(p, | ||
2613 | in.ptr - SEQ_SRC_OFFSET - BIT_OFFSET_DELTA_BYTES, in.len, | ||
2614 | p->winPos + outLimit, &vars); | ||
2615 | } | ||
2616 | } | ||
2617 | |||
2618 | |||
2619 | |||
2620 | |||
2621 | // inSize != 0 | ||
2622 | // it must do similar to ZstdDec1_DecodeBlock() | ||
2623 | static size_t ZstdDec1_NeedTempBufferForInput( | ||
2624 | const SizeT beforeSize, const Byte * const src, const SizeT inSize) | ||
2625 | { | ||
2626 | unsigned b0; | ||
2627 | UInt32 pos; | ||
2628 | |||
2629 | #ifdef Z7_ZSTD_DEC_USE_CHECK_OF_NEED_TEMP | ||
2630 | g_numSeqs = 1 << 24; | ||
2631 | #else | ||
2632 | // we have at least 3 bytes before seq data: litBlockType, numSeqs, seqMode | ||
2633 | #define MIN_BLOCK_LZ_HEADERS_SIZE 3 | ||
2634 | if (beforeSize >= MAX_BACKWARD_DEPTH - MIN_BLOCK_LZ_HEADERS_SIZE) | ||
2635 | return 0; | ||
2636 | #endif | ||
2637 | |||
2638 | b0 = src[0]; | ||
2639 | |||
2640 | if ((b0 & k_LitBlockType_Flag_Compressed) == 0) | ||
2641 | { | ||
2642 | UInt32 numLits = b0 >> 3; | ||
2643 | pos = 1; | ||
2644 | if (b0 & 4) | ||
2645 | { | ||
2646 | UInt32 v; | ||
2647 | if (inSize < 3) | ||
2648 | return 0; | ||
2649 | numLits >>= 1; | ||
2650 | v = GetUi16(src + 1); | ||
2651 | pos = 3; | ||
2652 | if ((b0 & 8) == 0) | ||
2653 | { | ||
2654 | pos = 2; | ||
2655 | v = (Byte)v; | ||
2656 | } | ||
2657 | numLits += v << 4; | ||
2658 | } | ||
2659 | if (b0 & k_LitBlockType_Flag_RLE_or_Treeless) | ||
2660 | numLits = 1; | ||
2661 | pos += numLits; | ||
2662 | } | ||
2663 | else if (inSize < 5) | ||
2664 | return 0; | ||
2665 | else | ||
2666 | { | ||
2667 | const unsigned mode4Streams = b0 & 0xc; | ||
2668 | const unsigned numBytes = (3 * mode4Streams + 48) >> 4; | ||
2669 | const unsigned numBits = 4 * numBytes - 6; | ||
2670 | UInt32 cs = GetUi32(src + 1); | ||
2671 | cs >>= numBits; | ||
2672 | cs &= ((UInt32)16 << numBits) - 1; | ||
2673 | if (cs == 0) | ||
2674 | return 0; | ||
2675 | pos = numBytes + cs; | ||
2676 | } | ||
2677 | |||
2678 | if (pos >= inSize) | ||
2679 | return 0; | ||
2680 | { | ||
2681 | UInt32 numSeqs = src[pos++]; | ||
2682 | if (numSeqs > 127) | ||
2683 | { | ||
2684 | UInt32 b1; | ||
2685 | if (pos >= inSize) | ||
2686 | return 0; | ||
2687 | numSeqs -= 128; | ||
2688 | b1 = src[pos++]; | ||
2689 | if (numSeqs == 127) | ||
2690 | { | ||
2691 | if (pos >= inSize) | ||
2692 | return 0; | ||
2693 | numSeqs = (UInt32)(src[pos++]) + 127; | ||
2694 | } | ||
2695 | numSeqs = (numSeqs << 8) + b1; | ||
2696 | } | ||
2697 | #ifdef Z7_ZSTD_DEC_USE_CHECK_OF_NEED_TEMP | ||
2698 | g_numSeqs = numSeqs; // for debug | ||
2699 | #endif | ||
2700 | if (numSeqs == 0) | ||
2701 | return 0; | ||
2702 | } | ||
2703 | /* | ||
2704 | if (pos >= inSize) | ||
2705 | return 0; | ||
2706 | pos++; | ||
2707 | */ | ||
2708 | // we will have one additional byte for seqMode: | ||
2709 | if (beforeSize + pos >= MAX_BACKWARD_DEPTH - 1) | ||
2710 | return 0; | ||
2711 | return 1; | ||
2712 | } | ||
2713 | |||
2714 | |||
2715 | |||
2716 | // ---------- ZSTD FRAME ---------- | ||
2717 | |||
2718 | #define kBlockType_Raw 0 | ||
2719 | #define kBlockType_RLE 1 | ||
2720 | #define kBlockType_Compressed 2 | ||
2721 | #define kBlockType_Reserved 3 | ||
2722 | |||
2723 | typedef enum | ||
2724 | { | ||
2725 | // begin: states that require 4 bytes: | ||
2726 | ZSTD2_STATE_SIGNATURE, | ||
2727 | ZSTD2_STATE_HASH, | ||
2728 | ZSTD2_STATE_SKIP_HEADER, | ||
2729 | // end of states that require 4 bytes | ||
2730 | |||
2731 | ZSTD2_STATE_SKIP_DATA, | ||
2732 | ZSTD2_STATE_FRAME_HEADER, | ||
2733 | ZSTD2_STATE_AFTER_HEADER, | ||
2734 | ZSTD2_STATE_BLOCK, | ||
2735 | ZSTD2_STATE_DATA, | ||
2736 | ZSTD2_STATE_FINISHED | ||
2737 | } EZstd2State; | ||
2738 | |||
2739 | |||
2740 | struct CZstdDec | ||
2741 | { | ||
2742 | EZstd2State frameState; | ||
2743 | unsigned tempSize; | ||
2744 | |||
2745 | Byte temp[14]; // 14 is required | ||
2746 | |||
2747 | Byte descriptor; | ||
2748 | Byte windowDescriptor; | ||
2749 | Byte isLastBlock; | ||
2750 | Byte blockType; | ||
2751 | Byte isErrorState; | ||
2752 | Byte hashError; | ||
2753 | Byte disableHash; | ||
2754 | Byte isCyclicMode; | ||
2755 | |||
2756 | UInt32 blockSize; | ||
2757 | UInt32 dictionaryId; | ||
2758 | UInt32 curBlockUnpackRem; // for compressed blocks only | ||
2759 | UInt32 inTempPos; | ||
2760 | |||
2761 | UInt64 contentSize; | ||
2762 | UInt64 contentProcessed; | ||
2763 | CXxh64State xxh64; | ||
2764 | |||
2765 | Byte *inTemp; | ||
2766 | SizeT winBufSize_Allocated; | ||
2767 | Byte *win_Base; | ||
2768 | |||
2769 | ISzAllocPtr alloc_Small; | ||
2770 | ISzAllocPtr alloc_Big; | ||
2771 | |||
2772 | CZstdDec1 decoder; | ||
2773 | }; | ||
2774 | |||
2775 | #define ZstdDec_GET_UNPROCESSED_XXH64_SIZE(p) \ | ||
2776 | ((unsigned)(p)->contentProcessed & (Z7_XXH64_BLOCK_SIZE - 1)) | ||
2777 | |||
2778 | #define ZSTD_DEC_IS_LAST_BLOCK(p) ((p)->isLastBlock) | ||
2779 | |||
2780 | |||
2781 | static void ZstdDec_FreeWindow(CZstdDec * const p) | ||
2782 | { | ||
2783 | if (p->win_Base) | ||
2784 | { | ||
2785 | ISzAlloc_Free(p->alloc_Big, p->win_Base); | ||
2786 | p->win_Base = NULL; | ||
2787 | // p->decoder.win = NULL; | ||
2788 | p->winBufSize_Allocated = 0; | ||
2789 | } | ||
2790 | } | ||
2791 | |||
2792 | |||
2793 | CZstdDecHandle ZstdDec_Create(ISzAllocPtr alloc_Small, ISzAllocPtr alloc_Big) | ||
2794 | { | ||
2795 | CZstdDec *p = (CZstdDec *)ISzAlloc_Alloc(alloc_Small, sizeof(CZstdDec)); | ||
2796 | if (!p) | ||
2797 | return NULL; | ||
2798 | p->alloc_Small = alloc_Small; | ||
2799 | p->alloc_Big = alloc_Big; | ||
2800 | // ZstdDec_CONSTRUCT(p) | ||
2801 | p->inTemp = NULL; | ||
2802 | p->win_Base = NULL; | ||
2803 | p->winBufSize_Allocated = 0; | ||
2804 | p->disableHash = False; | ||
2805 | ZstdDec1_Construct(&p->decoder); | ||
2806 | return p; | ||
2807 | } | ||
2808 | |||
2809 | void ZstdDec_Destroy(CZstdDecHandle p) | ||
2810 | { | ||
2811 | #ifdef SHOW_STAT | ||
2812 | #define PRINT_STAT1(name, v) \ | ||
2813 | printf("\n%25s = %9u", name, v); | ||
2814 | PRINT_STAT1("g_Num_Blocks_Compressed", g_Num_Blocks_Compressed) | ||
2815 | PRINT_STAT1("g_Num_Blocks_memcpy", g_Num_Blocks_memcpy) | ||
2816 | PRINT_STAT1("g_Num_Wrap_memmove_Num", g_Num_Wrap_memmove_Num) | ||
2817 | PRINT_STAT1("g_Num_Wrap_memmove_Bytes", g_Num_Wrap_memmove_Bytes) | ||
2818 | if (g_Num_Blocks_Compressed) | ||
2819 | { | ||
2820 | #define PRINT_STAT(name, v) \ | ||
2821 | printf("\n%17s = %9u, per_block = %8u", name, v, v / g_Num_Blocks_Compressed); | ||
2822 | PRINT_STAT("g_NumSeqs", g_NumSeqs_total) | ||
2823 | // PRINT_STAT("g_NumCopy", g_NumCopy) | ||
2824 | PRINT_STAT("g_NumOver", g_NumOver) | ||
2825 | PRINT_STAT("g_NumOver2", g_NumOver2) | ||
2826 | PRINT_STAT("g_Num_Match", g_Num_Match) | ||
2827 | PRINT_STAT("g_Num_Lits", g_Num_Lits) | ||
2828 | PRINT_STAT("g_Num_LitsBig", g_Num_LitsBig) | ||
2829 | PRINT_STAT("g_Num_Lit0", g_Num_Lit0) | ||
2830 | PRINT_STAT("g_Num_Rep_0", g_Num_Rep0) | ||
2831 | PRINT_STAT("g_Num_Rep_1", g_Num_Rep1) | ||
2832 | PRINT_STAT("g_Num_Rep_2", g_Num_Rep2) | ||
2833 | PRINT_STAT("g_Num_Rep_3", g_Num_Rep3) | ||
2834 | PRINT_STAT("g_Num_Threshold_0", g_Num_Threshold_0) | ||
2835 | PRINT_STAT("g_Num_Threshold_1", g_Num_Threshold_1) | ||
2836 | PRINT_STAT("g_Num_Threshold_0sum", g_Num_Threshold_0sum) | ||
2837 | PRINT_STAT("g_Num_Threshold_1sum", g_Num_Threshold_1sum) | ||
2838 | } | ||
2839 | printf("\n"); | ||
2840 | #endif | ||
2841 | |||
2842 | ISzAlloc_Free(p->alloc_Small, p->decoder.literalsBase); | ||
2843 | // p->->decoder.literalsBase = NULL; | ||
2844 | ISzAlloc_Free(p->alloc_Small, p->inTemp); | ||
2845 | // p->inTemp = NULL; | ||
2846 | ZstdDec_FreeWindow(p); | ||
2847 | ISzAlloc_Free(p->alloc_Small, p); | ||
2848 | } | ||
2849 | |||
2850 | |||
2851 | |||
2852 | #define kTempBuffer_PreSize (1u << 6) | ||
2853 | #if kTempBuffer_PreSize < MAX_BACKWARD_DEPTH | ||
2854 | #error Stop_Compiling_Bad_kTempBuffer_PreSize | ||
2855 | #endif | ||
2856 | |||
2857 | static SRes ZstdDec_AllocateMisc(CZstdDec *p) | ||
2858 | { | ||
2859 | #define k_Lit_AfterAvail (1u << 6) | ||
2860 | #if k_Lit_AfterAvail < (COPY_CHUNK_SIZE - 1) | ||
2861 | #error Stop_Compiling_Bad_k_Lit_AfterAvail | ||
2862 | #endif | ||
2863 | // return ZstdDec1_Allocate(&p->decoder, p->alloc_Small); | ||
2864 | if (!p->decoder.literalsBase) | ||
2865 | { | ||
2866 | p->decoder.literalsBase = (Byte *)ISzAlloc_Alloc(p->alloc_Small, | ||
2867 | kBlockSizeMax + k_Lit_AfterAvail); | ||
2868 | if (!p->decoder.literalsBase) | ||
2869 | return SZ_ERROR_MEM; | ||
2870 | } | ||
2871 | if (!p->inTemp) | ||
2872 | { | ||
2873 | // we need k_Lit_AfterAvail here for owerread from raw literals stream | ||
2874 | p->inTemp = (Byte *)ISzAlloc_Alloc(p->alloc_Small, | ||
2875 | kBlockSizeMax + kTempBuffer_PreSize + k_Lit_AfterAvail); | ||
2876 | if (!p->inTemp) | ||
2877 | return SZ_ERROR_MEM; | ||
2878 | } | ||
2879 | return SZ_OK; | ||
2880 | } | ||
2881 | |||
2882 | |||
2883 | static void ZstdDec_Init_ForNewFrame(CZstdDec *p) | ||
2884 | { | ||
2885 | p->frameState = ZSTD2_STATE_SIGNATURE; | ||
2886 | p->tempSize = 0; | ||
2887 | |||
2888 | p->isErrorState = False; | ||
2889 | p->hashError = False; | ||
2890 | p->isCyclicMode = False; | ||
2891 | p->contentProcessed = 0; | ||
2892 | Xxh64State_Init(&p->xxh64); | ||
2893 | ZstdDec1_Init(&p->decoder); | ||
2894 | } | ||
2895 | |||
2896 | |||
2897 | void ZstdDec_Init(CZstdDec *p) | ||
2898 | { | ||
2899 | ZstdDec_Init_ForNewFrame(p); | ||
2900 | p->decoder.winPos = 0; | ||
2901 | memset(p->temp, 0, sizeof(p->temp)); | ||
2902 | } | ||
2903 | |||
2904 | |||
2905 | #define DESCRIPTOR_Get_DictionaryId_Flag(d) ((d) & 3) | ||
2906 | #define DESCRIPTOR_FLAG_CHECKSUM (1 << 2) | ||
2907 | #define DESCRIPTOR_FLAG_RESERVED (1 << 3) | ||
2908 | // #define DESCRIPTOR_FLAG_UNUSED (1 << 4) | ||
2909 | #define DESCRIPTOR_FLAG_SINGLE (1 << 5) | ||
2910 | #define DESCRIPTOR_Get_ContentSize_Flag3(d) ((d) >> 5) | ||
2911 | #define DESCRIPTOR_Is_ContentSize_Defined(d) (((d) & 0xe0) != 0) | ||
2912 | |||
2913 | |||
2914 | static EZstd2State ZstdDec_UpdateState(CZstdDec * const p, const Byte b, CZstdDecInfo * const info) | ||
2915 | { | ||
2916 | unsigned tempSize = p->tempSize; | ||
2917 | p->temp[tempSize++] = b; | ||
2918 | p->tempSize = tempSize; | ||
2919 | |||
2920 | if (p->frameState == ZSTD2_STATE_BLOCK) | ||
2921 | { | ||
2922 | if (tempSize < 3) | ||
2923 | return ZSTD2_STATE_BLOCK; | ||
2924 | { | ||
2925 | UInt32 b0 = GetUi32(p->temp); | ||
2926 | const unsigned type = ((unsigned)b0 >> 1) & 3; | ||
2927 | if (type == kBlockType_RLE && tempSize == 3) | ||
2928 | return ZSTD2_STATE_BLOCK; | ||
2929 | // info->num_Blocks_forType[type]++; | ||
2930 | info->num_Blocks++; | ||
2931 | if (type == kBlockType_Reserved) | ||
2932 | { | ||
2933 | p->isErrorState = True; // SZ_ERROR_UNSUPPORTED | ||
2934 | return ZSTD2_STATE_BLOCK; | ||
2935 | } | ||
2936 | p->blockType = (Byte)type; | ||
2937 | p->isLastBlock = (Byte)(b0 & 1); | ||
2938 | p->inTempPos = 0; | ||
2939 | p->tempSize = 0; | ||
2940 | b0 >>= 3; | ||
2941 | b0 &= 0x1fffff; | ||
2942 | // info->num_BlockBytes_forType[type] += b0; | ||
2943 | if (b0 == 0) | ||
2944 | { | ||
2945 | // empty RAW/RLE blocks are allowed in original-zstd decoder | ||
2946 | if (type == kBlockType_Compressed) | ||
2947 | { | ||
2948 | p->isErrorState = True; | ||
2949 | return ZSTD2_STATE_BLOCK; | ||
2950 | } | ||
2951 | if (!ZSTD_DEC_IS_LAST_BLOCK(p)) | ||
2952 | return ZSTD2_STATE_BLOCK; | ||
2953 | if (p->descriptor & DESCRIPTOR_FLAG_CHECKSUM) | ||
2954 | return ZSTD2_STATE_HASH; | ||
2955 | return ZSTD2_STATE_FINISHED; | ||
2956 | } | ||
2957 | p->blockSize = b0; | ||
2958 | { | ||
2959 | UInt32 blockLim = ZstdDec1_GET_BLOCK_SIZE_LIMIT(&p->decoder); | ||
2960 | // compressed and uncompressed block sizes cannot be larger than min(kBlockSizeMax, window_size) | ||
2961 | if (b0 > blockLim) | ||
2962 | { | ||
2963 | p->isErrorState = True; // SZ_ERROR_UNSUPPORTED; | ||
2964 | return ZSTD2_STATE_BLOCK; | ||
2965 | } | ||
2966 | if (DESCRIPTOR_Is_ContentSize_Defined(p->descriptor)) | ||
2967 | { | ||
2968 | const UInt64 rem = p->contentSize - p->contentProcessed; | ||
2969 | if (blockLim > rem) | ||
2970 | blockLim = (UInt32)rem; | ||
2971 | } | ||
2972 | p->curBlockUnpackRem = blockLim; | ||
2973 | // uncompressed block size cannot be larger than remain data size: | ||
2974 | if (type != kBlockType_Compressed) | ||
2975 | { | ||
2976 | if (b0 > blockLim) | ||
2977 | { | ||
2978 | p->isErrorState = True; // SZ_ERROR_UNSUPPORTED; | ||
2979 | return ZSTD2_STATE_BLOCK; | ||
2980 | } | ||
2981 | } | ||
2982 | } | ||
2983 | } | ||
2984 | return ZSTD2_STATE_DATA; | ||
2985 | } | ||
2986 | |||
2987 | if ((unsigned)p->frameState < ZSTD2_STATE_SKIP_DATA) | ||
2988 | { | ||
2989 | UInt32 v; | ||
2990 | if (tempSize != 4) | ||
2991 | return p->frameState; | ||
2992 | v = GetUi32(p->temp); | ||
2993 | if ((unsigned)p->frameState < ZSTD2_STATE_HASH) // == ZSTD2_STATE_SIGNATURE | ||
2994 | { | ||
2995 | if (v == 0xfd2fb528) | ||
2996 | { | ||
2997 | p->tempSize = 0; | ||
2998 | info->num_DataFrames++; | ||
2999 | return ZSTD2_STATE_FRAME_HEADER; | ||
3000 | } | ||
3001 | if ((v & 0xfffffff0) == 0x184d2a50) | ||
3002 | { | ||
3003 | p->tempSize = 0; | ||
3004 | info->num_SkipFrames++; | ||
3005 | return ZSTD2_STATE_SKIP_HEADER; | ||
3006 | } | ||
3007 | p->isErrorState = True; | ||
3008 | return ZSTD2_STATE_SIGNATURE; | ||
3009 | // return ZSTD2_STATE_ERROR; // is not ZSTD stream | ||
3010 | } | ||
3011 | if (p->frameState == ZSTD2_STATE_HASH) | ||
3012 | { | ||
3013 | info->checksum_Defined = True; | ||
3014 | info->checksum = v; | ||
3015 | // #ifndef DISABLE_XXH_CHECK | ||
3016 | if (!p->disableHash) | ||
3017 | { | ||
3018 | if (p->decoder.winPos < ZstdDec_GET_UNPROCESSED_XXH64_SIZE(p)) | ||
3019 | { | ||
3020 | // unexpected code failure | ||
3021 | p->isErrorState = True; | ||
3022 | // SZ_ERROR_FAIL; | ||
3023 | } | ||
3024 | else | ||
3025 | if ((UInt32)Xxh64State_Digest(&p->xxh64, | ||
3026 | p->decoder.win + (p->decoder.winPos - ZstdDec_GET_UNPROCESSED_XXH64_SIZE(p)), | ||
3027 | p->contentProcessed) != v) | ||
3028 | { | ||
3029 | p->hashError = True; | ||
3030 | // return ZSTD2_STATE_ERROR; // hash error | ||
3031 | } | ||
3032 | } | ||
3033 | // #endif | ||
3034 | return ZSTD2_STATE_FINISHED; | ||
3035 | } | ||
3036 | // (p->frameState == ZSTD2_STATE_SKIP_HEADER) | ||
3037 | { | ||
3038 | p->blockSize = v; | ||
3039 | info->skipFrames_Size += v; | ||
3040 | p->tempSize = 0; | ||
3041 | /* we want the caller could know that there was finished frame | ||
3042 | finished frame. So we allow the case where | ||
3043 | we have ZSTD2_STATE_SKIP_DATA state with (blockSize == 0). | ||
3044 | */ | ||
3045 | // if (v == 0) return ZSTD2_STATE_SIGNATURE; | ||
3046 | return ZSTD2_STATE_SKIP_DATA; | ||
3047 | } | ||
3048 | } | ||
3049 | |||
3050 | // if (p->frameState == ZSTD2_STATE_FRAME_HEADER) | ||
3051 | { | ||
3052 | unsigned descriptor; | ||
3053 | const Byte *h; | ||
3054 | descriptor = p->temp[0]; | ||
3055 | p->descriptor = (Byte)descriptor; | ||
3056 | if (descriptor & DESCRIPTOR_FLAG_RESERVED) // reserved bit | ||
3057 | { | ||
3058 | p->isErrorState = True; | ||
3059 | return ZSTD2_STATE_FRAME_HEADER; | ||
3060 | // return ZSTD2_STATE_ERROR; | ||
3061 | } | ||
3062 | { | ||
3063 | const unsigned n = DESCRIPTOR_Get_ContentSize_Flag3(descriptor); | ||
3064 | // tempSize -= 1 + ((1u << (n >> 1)) | ((n + 1) & 1)); | ||
3065 | tempSize -= (0x9a563422u >> (n * 4)) & 0xf; | ||
3066 | } | ||
3067 | if (tempSize != (4u >> (3 - DESCRIPTOR_Get_DictionaryId_Flag(descriptor)))) | ||
3068 | return ZSTD2_STATE_FRAME_HEADER; | ||
3069 | |||
3070 | info->descriptor_OR = (Byte)(info->descriptor_OR | descriptor); | ||
3071 | info->descriptor_NOT_OR = (Byte)(info->descriptor_NOT_OR | ~descriptor); | ||
3072 | |||
3073 | h = &p->temp[1]; | ||
3074 | { | ||
3075 | Byte w = 0; | ||
3076 | if ((descriptor & DESCRIPTOR_FLAG_SINGLE) == 0) | ||
3077 | { | ||
3078 | w = *h++; | ||
3079 | if (info->windowDescriptor_MAX < w) | ||
3080 | info->windowDescriptor_MAX = w; | ||
3081 | // info->are_WindowDescriptors = True; | ||
3082 | // info->num_WindowDescriptors++; | ||
3083 | } | ||
3084 | else | ||
3085 | { | ||
3086 | // info->are_SingleSegments = True; | ||
3087 | // info->num_SingleSegments++; | ||
3088 | } | ||
3089 | p->windowDescriptor = w; | ||
3090 | } | ||
3091 | { | ||
3092 | unsigned n = DESCRIPTOR_Get_DictionaryId_Flag(descriptor); | ||
3093 | UInt32 d = 0; | ||
3094 | if (n) | ||
3095 | { | ||
3096 | n = 1u << (n - 1); | ||
3097 | d = GetUi32(h) & ((UInt32)(Int32)-1 >> (32 - 8u * n)); | ||
3098 | h += n; | ||
3099 | } | ||
3100 | p->dictionaryId = d; | ||
3101 | // info->dictionaryId_Cur = d; | ||
3102 | if (d != 0) | ||
3103 | { | ||
3104 | if (info->dictionaryId == 0) | ||
3105 | info->dictionaryId = d; | ||
3106 | else if (info->dictionaryId != d) | ||
3107 | info->are_DictionaryId_Different = True; | ||
3108 | } | ||
3109 | } | ||
3110 | { | ||
3111 | unsigned n = DESCRIPTOR_Get_ContentSize_Flag3(descriptor); | ||
3112 | UInt64 v = 0; | ||
3113 | if (n) | ||
3114 | { | ||
3115 | n >>= 1; | ||
3116 | if (n == 1) | ||
3117 | v = 256; | ||
3118 | v += GetUi64(h) & ((UInt64)(Int64)-1 >> (64 - (8u << n))); | ||
3119 | // info->are_ContentSize_Known = True; | ||
3120 | // info->num_Frames_with_ContentSize++; | ||
3121 | if (info->contentSize_MAX < v) | ||
3122 | info->contentSize_MAX = v; | ||
3123 | info->contentSize_Total += v; | ||
3124 | } | ||
3125 | else | ||
3126 | { | ||
3127 | info->are_ContentSize_Unknown = True; | ||
3128 | // info->num_Frames_without_ContentSize++; | ||
3129 | } | ||
3130 | p->contentSize = v; | ||
3131 | } | ||
3132 | // if ((size_t)(h - p->temp) != headerSize) return ZSTD2_STATE_ERROR; // it's unexpected internal code failure | ||
3133 | p->tempSize = 0; | ||
3134 | |||
3135 | info->checksum_Defined = False; | ||
3136 | /* | ||
3137 | if (descriptor & DESCRIPTOR_FLAG_CHECKSUM) | ||
3138 | info->are_Checksums = True; | ||
3139 | else | ||
3140 | info->are_Non_Checksums = True; | ||
3141 | */ | ||
3142 | |||
3143 | return ZSTD2_STATE_AFTER_HEADER; // ZSTD2_STATE_BLOCK; | ||
3144 | } | ||
3145 | } | ||
3146 | |||
3147 | |||
3148 | static void ZstdDec_Update_XXH(CZstdDec * const p, size_t xxh64_winPos) | ||
3149 | { | ||
3150 | /* | ||
3151 | #ifdef DISABLE_XXH_CHECK | ||
3152 | UNUSED_VAR(data) | ||
3153 | #else | ||
3154 | */ | ||
3155 | if (!p->disableHash && (p->descriptor & DESCRIPTOR_FLAG_CHECKSUM)) | ||
3156 | { | ||
3157 | // const size_t pos = p->xxh64_winPos; | ||
3158 | const size_t size = (p->decoder.winPos - xxh64_winPos) & ~(size_t)31; | ||
3159 | if (size) | ||
3160 | { | ||
3161 | // p->xxh64_winPos = pos + size; | ||
3162 | Xxh64State_UpdateBlocks(&p->xxh64, | ||
3163 | p->decoder.win + xxh64_winPos, | ||
3164 | p->decoder.win + xxh64_winPos + size); | ||
3165 | } | ||
3166 | } | ||
3167 | } | ||
3168 | |||
3169 | |||
3170 | /* | ||
3171 | in: | ||
3172 | (winLimit) : is relaxed limit, where this function is allowed to stop writing of decoded data (if possible). | ||
3173 | - this function uses (winLimit) for RAW/RLE blocks only, | ||
3174 | because this function can decode single RAW/RLE block in several different calls. | ||
3175 | - this function DOESN'T use (winLimit) for Compressed blocks, | ||
3176 | because this function decodes full compressed block in single call. | ||
3177 | (CZstdDec1::winPos <= winLimit) | ||
3178 | (winLimit <= CZstdDec1::cycSize). | ||
3179 | Note: if (ds->outBuf_fromCaller) mode is used, then | ||
3180 | { | ||
3181 | (strong_limit) is stored in CZstdDec1::cycSize. | ||
3182 | So (winLimit) is more strong than (strong_limit). | ||
3183 | } | ||
3184 | |||
3185 | exit: | ||
3186 | Note: (CZstdDecState::winPos) will be set by caller after exit of this function. | ||
3187 | |||
3188 | This function can exit for any of these conditions: | ||
3189 | - (frameState == ZSTD2_STATE_AFTER_HEADER) | ||
3190 | - (frameState == ZSTD2_STATE_FINISHED) : frame was finished : (status == ZSTD_STATUS_FINISHED_FRAME) is set | ||
3191 | - finished non-empty non-last block. So (CZstdDec1::winPos_atExit != winPos_atFuncStart). | ||
3192 | - ZSTD_STATUS_NEEDS_MORE_INPUT in src | ||
3193 | - (CZstdDec1::winPos) have reached (winLimit) in non-finished RAW/RLE block | ||
3194 | |||
3195 | This function decodes no more than one non-empty block. | ||
3196 | So it fulfills the condition at exit: | ||
3197 | (CZstdDec1::winPos_atExit - winPos_atFuncStart <= block_size_max) | ||
3198 | Note: (winPos_atExit > winLimit) is possible in some cases after compressed block decoding. | ||
3199 | |||
3200 | if (ds->outBuf_fromCaller) mode (useAdditionalWinLimit medo) | ||
3201 | { | ||
3202 | then this function uses additional strong limit from (CZstdDec1::cycSize). | ||
3203 | So this function will not write any data after (CZstdDec1::cycSize) | ||
3204 | And it fulfills the condition at exit: | ||
3205 | (CZstdDec1::winPos_atExit <= CZstdDec1::cycSize) | ||
3206 | } | ||
3207 | */ | ||
3208 | static SRes ZstdDec_DecodeBlock(CZstdDec * const p, CZstdDecState * const ds, | ||
3209 | SizeT winLimitAdd) | ||
3210 | { | ||
3211 | const Byte *src = ds->inBuf; | ||
3212 | SizeT * const srcLen = &ds->inPos; | ||
3213 | const SizeT inSize = ds->inLim; | ||
3214 | // const int useAdditionalWinLimit = ds->outBuf_fromCaller ? 1 : 0; | ||
3215 | enum_ZstdStatus * const status = &ds->status; | ||
3216 | CZstdDecInfo * const info = &ds->info; | ||
3217 | SizeT winLimit; | ||
3218 | |||
3219 | const SizeT winPos_atFuncStart = p->decoder.winPos; | ||
3220 | src += *srcLen; | ||
3221 | *status = ZSTD_STATUS_NOT_SPECIFIED; | ||
3222 | |||
3223 | // finishMode = ZSTD_FINISH_ANY; | ||
3224 | if (ds->outSize_Defined) | ||
3225 | { | ||
3226 | if (ds->outSize < ds->outProcessed) | ||
3227 | { | ||
3228 | // p->isAfterSizeMode = 2; // we have extra bytes already | ||
3229 | *status = ZSTD_STATUS_OUT_REACHED; | ||
3230 | return SZ_OK; | ||
3231 | // size = 0; | ||
3232 | } | ||
3233 | else | ||
3234 | { | ||
3235 | // p->outSize >= p->outProcessed | ||
3236 | const UInt64 rem = ds->outSize - ds->outProcessed; | ||
3237 | /* | ||
3238 | if (rem == 0) | ||
3239 | p->isAfterSizeMode = 1; // we have reached exact required size | ||
3240 | */ | ||
3241 | if (winLimitAdd >= rem) | ||
3242 | { | ||
3243 | winLimitAdd = (SizeT)rem; | ||
3244 | // if (p->finishMode) finishMode = ZSTD_FINISH_END; | ||
3245 | } | ||
3246 | } | ||
3247 | } | ||
3248 | |||
3249 | winLimit = p->decoder.winPos + winLimitAdd; | ||
3250 | // (p->decoder.winPos <= winLimit) | ||
3251 | |||
3252 | // while (p->frameState != ZSTD2_STATE_ERROR) | ||
3253 | while (!p->isErrorState) | ||
3254 | { | ||
3255 | SizeT inCur = inSize - *srcLen; | ||
3256 | |||
3257 | if (p->frameState == ZSTD2_STATE_DATA) | ||
3258 | { | ||
3259 | /* (p->decoder.winPos == winPos_atFuncStart) is expected, | ||
3260 | because this function doesn't start new block. | ||
3261 | if it have finished some non-empty block in this call. */ | ||
3262 | if (p->decoder.winPos != winPos_atFuncStart) | ||
3263 | return SZ_ERROR_FAIL; // it's unexpected | ||
3264 | |||
3265 | /* | ||
3266 | if (p->decoder.winPos > winLimit) | ||
3267 | { | ||
3268 | // we can be here, if in this function call | ||
3269 | // - we have extracted non-empty compressed block, and (winPos > winLimit) after that. | ||
3270 | // - we have started new block decoding after that. | ||
3271 | // It's unexpected case, because we exit after non-empty non-last block. | ||
3272 | *status = (inSize == *srcLen) ? | ||
3273 | ZSTD_STATUS_NEEDS_MORE_INPUT : | ||
3274 | ZSTD_STATUS_NOT_FINISHED; | ||
3275 | return SZ_OK; | ||
3276 | } | ||
3277 | */ | ||
3278 | // p->decoder.winPos <= winLimit | ||
3279 | |||
3280 | if (p->blockType != kBlockType_Compressed) | ||
3281 | { | ||
3282 | // it's RLE or RAW block. | ||
3283 | // p->BlockSize != 0_ | ||
3284 | // winLimit <= p->decoder.cycSize | ||
3285 | /* So here we use more strong (winLimit), even for | ||
3286 | (ds->outBuf_fromCaller) mode. */ | ||
3287 | SizeT outCur = winLimit - p->decoder.winPos; | ||
3288 | { | ||
3289 | const UInt32 rem = p->blockSize; | ||
3290 | if (outCur > rem) | ||
3291 | outCur = rem; | ||
3292 | } | ||
3293 | if (p->blockType == kBlockType_Raw) | ||
3294 | { | ||
3295 | if (outCur > inCur) | ||
3296 | outCur = inCur; | ||
3297 | /* output buffer is better aligned for XXH code. | ||
3298 | So we use hash for output buffer data */ | ||
3299 | // ZstdDec_Update_XXH(p, src, outCur); // for debug: | ||
3300 | memcpy(p->decoder.win + p->decoder.winPos, src, outCur); | ||
3301 | src += outCur; | ||
3302 | *srcLen += outCur; | ||
3303 | } | ||
3304 | else // kBlockType_RLE | ||
3305 | { | ||
3306 | #define RLE_BYTE_INDEX_IN_temp 3 | ||
3307 | memset(p->decoder.win + p->decoder.winPos, | ||
3308 | p->temp[RLE_BYTE_INDEX_IN_temp], outCur); | ||
3309 | } | ||
3310 | { | ||
3311 | const SizeT xxh64_winPos = p->decoder.winPos - ZstdDec_GET_UNPROCESSED_XXH64_SIZE(p); | ||
3312 | p->decoder.winPos += outCur; | ||
3313 | p->contentProcessed += outCur; | ||
3314 | ZstdDec_Update_XXH(p, xxh64_winPos); | ||
3315 | } | ||
3316 | // ds->winPos = p->decoder.winPos; // the caller does it instead. for debug: | ||
3317 | UPDATE_TOTAL_OUT(&p->decoder, outCur) | ||
3318 | ds->outProcessed += outCur; | ||
3319 | if (p->blockSize -= (UInt32)outCur) | ||
3320 | { | ||
3321 | /* | ||
3322 | if (ds->outSize_Defined) | ||
3323 | { | ||
3324 | if (ds->outSize <= ds->outProcessed) ds->isAfterSizeMode = (enum_ZstdStatus) | ||
3325 | (ds->outSize == ds->outProcessed ? 1u: 2u); | ||
3326 | } | ||
3327 | */ | ||
3328 | *status = (enum_ZstdStatus) | ||
3329 | (ds->outSize_Defined && ds->outSize <= ds->outProcessed ? | ||
3330 | ZSTD_STATUS_OUT_REACHED : (p->blockType == kBlockType_Raw && inSize == *srcLen) ? | ||
3331 | ZSTD_STATUS_NEEDS_MORE_INPUT : | ||
3332 | ZSTD_STATUS_NOT_FINISHED); | ||
3333 | return SZ_OK; | ||
3334 | } | ||
3335 | } | ||
3336 | else // kBlockType_Compressed | ||
3337 | { | ||
3338 | // p->blockSize != 0 | ||
3339 | // (uncompressed_size_of_block == 0) is allowed | ||
3340 | // (p->curBlockUnpackRem == 0) is allowed | ||
3341 | /* | ||
3342 | if (p->decoder.winPos >= winLimit) | ||
3343 | { | ||
3344 | if (p->decoder.winPos != winPos_atFuncStart) | ||
3345 | { | ||
3346 | // it's unexpected case | ||
3347 | // We already have some data in finished blocks in this function call. | ||
3348 | // So we don't decompress new block after (>=winLimit), | ||
3349 | // even if it's empty block. | ||
3350 | *status = (inSize == *srcLen) ? | ||
3351 | ZSTD_STATUS_NEEDS_MORE_INPUT : | ||
3352 | ZSTD_STATUS_NOT_FINISHED; | ||
3353 | return SZ_OK; | ||
3354 | } | ||
3355 | // (p->decoder.winPos == winLimit == winPos_atFuncStart) | ||
3356 | // we will decode current block, because that current | ||
3357 | // block can be empty block and we want to make some visible | ||
3358 | // change of (src) stream after function start. | ||
3359 | } | ||
3360 | */ | ||
3361 | /* | ||
3362 | if (ds->outSize_Defined && ds->outSize < ds->outProcessed) | ||
3363 | { | ||
3364 | // we don't want to start new block, if we have more extra decoded bytes already | ||
3365 | *status = ZSTD_STATUS_OUT_REACHED; | ||
3366 | return SZ_OK; | ||
3367 | } | ||
3368 | */ | ||
3369 | { | ||
3370 | const Byte *comprStream; | ||
3371 | size_t afterAvail; | ||
3372 | UInt32 inTempPos = p->inTempPos; | ||
3373 | const UInt32 rem = p->blockSize - inTempPos; | ||
3374 | // rem != 0 | ||
3375 | if (inTempPos != 0 // (inTemp) buffer already contains some input data | ||
3376 | || inCur < rem // available input data size is smaller than compressed block size | ||
3377 | || ZstdDec1_NeedTempBufferForInput(*srcLen, src, rem)) | ||
3378 | { | ||
3379 | if (inCur > rem) | ||
3380 | inCur = rem; | ||
3381 | if (inCur) | ||
3382 | { | ||
3383 | STAT_INC(g_Num_Blocks_memcpy) | ||
3384 | // we clear data for backward lookahead reading | ||
3385 | if (inTempPos == 0) | ||
3386 | memset(p->inTemp + kTempBuffer_PreSize - MAX_BACKWARD_DEPTH, 0, MAX_BACKWARD_DEPTH); | ||
3387 | // { unsigned y = 0; for(;y < 1000; y++) | ||
3388 | memcpy(p->inTemp + inTempPos + kTempBuffer_PreSize, src, inCur); | ||
3389 | // } | ||
3390 | src += inCur; | ||
3391 | *srcLen += inCur; | ||
3392 | inTempPos += (UInt32)inCur; | ||
3393 | p->inTempPos = inTempPos; | ||
3394 | } | ||
3395 | if (inTempPos != p->blockSize) | ||
3396 | { | ||
3397 | *status = ZSTD_STATUS_NEEDS_MORE_INPUT; | ||
3398 | return SZ_OK; | ||
3399 | } | ||
3400 | #if COPY_CHUNK_SIZE > 1 | ||
3401 | memset(p->inTemp + kTempBuffer_PreSize + inTempPos, 0, COPY_CHUNK_SIZE); | ||
3402 | #endif | ||
3403 | comprStream = p->inTemp + kTempBuffer_PreSize; | ||
3404 | afterAvail = k_Lit_AfterAvail; | ||
3405 | // we don't want to read non-initialized data or junk in CopyMatch(): | ||
3406 | } | ||
3407 | else | ||
3408 | { | ||
3409 | // inCur >= rem | ||
3410 | // we use direct decoding from (src) buffer: | ||
3411 | afterAvail = inCur - rem; | ||
3412 | comprStream = src; | ||
3413 | src += rem; | ||
3414 | *srcLen += rem; | ||
3415 | } | ||
3416 | |||
3417 | #ifdef Z7_ZSTD_DEC_USE_CHECK_OF_NEED_TEMP | ||
3418 | ZstdDec1_NeedTempBufferForInput(*srcLen, comprStream, p->blockSize); | ||
3419 | #endif | ||
3420 | // printf("\nblockSize=%u", p->blockSize); | ||
3421 | // printf("%x\n", (unsigned)p->contentProcessed); | ||
3422 | STAT_INC(g_Num_Blocks_Compressed) | ||
3423 | { | ||
3424 | SRes sres; | ||
3425 | const size_t winPos = p->decoder.winPos; | ||
3426 | /* | ||
3427 | if ( useAdditionalWinLimit), we use strong unpack limit: smallest from | ||
3428 | - limit from stream : (curBlockUnpackRem) | ||
3429 | - limit from caller : (cycSize - winPos) | ||
3430 | if (!useAdditionalWinLimit), we use only relaxed limit: | ||
3431 | - limit from stream : (curBlockUnpackRem) | ||
3432 | */ | ||
3433 | SizeT outLimit = p->curBlockUnpackRem; | ||
3434 | if (ds->outBuf_fromCaller) | ||
3435 | // if (useAdditionalWinLimit) | ||
3436 | { | ||
3437 | const size_t limit = p->decoder.cycSize - winPos; | ||
3438 | if (outLimit > limit) | ||
3439 | outLimit = limit; | ||
3440 | } | ||
3441 | sres = ZstdDec1_DecodeBlock(&p->decoder, | ||
3442 | comprStream, p->blockSize, afterAvail, outLimit); | ||
3443 | // ds->winPos = p->decoder.winPos; // the caller does it instead. for debug: | ||
3444 | if (sres) | ||
3445 | { | ||
3446 | p->isErrorState = True; | ||
3447 | return sres; | ||
3448 | } | ||
3449 | { | ||
3450 | const SizeT xxh64_winPos = winPos - ZstdDec_GET_UNPROCESSED_XXH64_SIZE(p); | ||
3451 | const size_t num = p->decoder.winPos - winPos; | ||
3452 | ds->outProcessed += num; | ||
3453 | p->contentProcessed += num; | ||
3454 | ZstdDec_Update_XXH(p, xxh64_winPos); | ||
3455 | } | ||
3456 | } | ||
3457 | // printf("\nwinPos=%x", (int)(unsigned)p->decoder.winPos); | ||
3458 | } | ||
3459 | } | ||
3460 | |||
3461 | /* | ||
3462 | if (ds->outSize_Defined) | ||
3463 | { | ||
3464 | if (ds->outSize <= ds->outProcessed) ds->isAfterSizeMode = (enum_ZstdStatus) | ||
3465 | (ds->outSize == ds->outProcessed ? 1u: 2u); | ||
3466 | } | ||
3467 | */ | ||
3468 | |||
3469 | if (!ZSTD_DEC_IS_LAST_BLOCK(p)) | ||
3470 | { | ||
3471 | p->frameState = ZSTD2_STATE_BLOCK; | ||
3472 | if (ds->outSize_Defined && ds->outSize < ds->outProcessed) | ||
3473 | { | ||
3474 | *status = ZSTD_STATUS_OUT_REACHED; | ||
3475 | return SZ_OK; | ||
3476 | } | ||
3477 | // we exit only if (winPos) was changed in this function call: | ||
3478 | if (p->decoder.winPos != winPos_atFuncStart) | ||
3479 | { | ||
3480 | // decoded block was not empty. So we exit: | ||
3481 | *status = (enum_ZstdStatus)( | ||
3482 | (inSize == *srcLen) ? | ||
3483 | ZSTD_STATUS_NEEDS_MORE_INPUT : | ||
3484 | ZSTD_STATUS_NOT_FINISHED); | ||
3485 | return SZ_OK; | ||
3486 | } | ||
3487 | // (p->decoder.winPos == winPos_atFuncStart) | ||
3488 | // so current decoded block was empty. | ||
3489 | // we will try to decode more blocks in this function. | ||
3490 | continue; | ||
3491 | } | ||
3492 | |||
3493 | // decoded block was last in frame | ||
3494 | if (p->descriptor & DESCRIPTOR_FLAG_CHECKSUM) | ||
3495 | { | ||
3496 | p->frameState = ZSTD2_STATE_HASH; | ||
3497 | if (ds->outSize_Defined && ds->outSize < ds->outProcessed) | ||
3498 | { | ||
3499 | *status = ZSTD_STATUS_OUT_REACHED; | ||
3500 | return SZ_OK; // disable if want to | ||
3501 | /* We want to get same return codes for any input buffer sizes. | ||
3502 | We want to get faster ZSTD_STATUS_OUT_REACHED status. | ||
3503 | So we exit with ZSTD_STATUS_OUT_REACHED here, | ||
3504 | instead of ZSTD2_STATE_HASH and ZSTD2_STATE_FINISHED processing. | ||
3505 | that depends from input buffer size and that can set | ||
3506 | ZSTD_STATUS_NEEDS_MORE_INPUT or return SZ_ERROR_DATA or SZ_ERROR_CRC. | ||
3507 | */ | ||
3508 | } | ||
3509 | } | ||
3510 | else | ||
3511 | { | ||
3512 | /* ZSTD2_STATE_FINISHED proccesing doesn't depend from input buffer */ | ||
3513 | p->frameState = ZSTD2_STATE_FINISHED; | ||
3514 | } | ||
3515 | /* | ||
3516 | p->frameState = (p->descriptor & DESCRIPTOR_FLAG_CHECKSUM) ? | ||
3517 | ZSTD2_STATE_HASH : | ||
3518 | ZSTD2_STATE_FINISHED; | ||
3519 | */ | ||
3520 | /* it's required to process ZSTD2_STATE_FINISHED state in this function call, | ||
3521 | because we must check contentSize and hashError in ZSTD2_STATE_FINISHED code, | ||
3522 | while the caller can reinit full state for ZSTD2_STATE_FINISHED | ||
3523 | So we can't exit from function here. */ | ||
3524 | continue; | ||
3525 | } | ||
3526 | |||
3527 | if (p->frameState == ZSTD2_STATE_FINISHED) | ||
3528 | { | ||
3529 | *status = ZSTD_STATUS_FINISHED_FRAME; | ||
3530 | if (DESCRIPTOR_Is_ContentSize_Defined(p->descriptor) | ||
3531 | && p->contentSize != p->contentProcessed) | ||
3532 | return SZ_ERROR_DATA; | ||
3533 | if (p->hashError) // for debug | ||
3534 | return SZ_ERROR_CRC; | ||
3535 | return SZ_OK; | ||
3536 | // p->frameState = ZSTD2_STATE_SIGNATURE; | ||
3537 | // continue; | ||
3538 | } | ||
3539 | |||
3540 | if (p->frameState == ZSTD2_STATE_AFTER_HEADER) | ||
3541 | return SZ_OK; // we need memory allocation for that state | ||
3542 | |||
3543 | if (p->frameState == ZSTD2_STATE_SKIP_DATA) | ||
3544 | { | ||
3545 | UInt32 blockSize = p->blockSize; | ||
3546 | // (blockSize == 0) is possible | ||
3547 | if (inCur > blockSize) | ||
3548 | inCur = blockSize; | ||
3549 | src += inCur; | ||
3550 | *srcLen += inCur; | ||
3551 | blockSize -= (UInt32)inCur; | ||
3552 | p->blockSize = blockSize; | ||
3553 | if (blockSize == 0) | ||
3554 | { | ||
3555 | p->frameState = ZSTD2_STATE_SIGNATURE; | ||
3556 | // continue; // for debug: we can continue without return to caller. | ||
3557 | // we notify the caller that skip frame was finished: | ||
3558 | *status = ZSTD_STATUS_FINISHED_FRAME; | ||
3559 | return SZ_OK; | ||
3560 | } | ||
3561 | // blockSize != 0 | ||
3562 | // (inCur) was smaller than previous value of p->blockSize. | ||
3563 | // (inSize == *srcLen) now | ||
3564 | *status = ZSTD_STATUS_NEEDS_MORE_INPUT; | ||
3565 | return SZ_OK; | ||
3566 | } | ||
3567 | |||
3568 | if (inCur == 0) | ||
3569 | { | ||
3570 | *status = ZSTD_STATUS_NEEDS_MORE_INPUT; | ||
3571 | return SZ_OK; | ||
3572 | } | ||
3573 | |||
3574 | { | ||
3575 | (*srcLen)++; | ||
3576 | p->frameState = ZstdDec_UpdateState(p, *src++, info); | ||
3577 | } | ||
3578 | } | ||
3579 | |||
3580 | *status = ZSTD_STATUS_NOT_SPECIFIED; | ||
3581 | p->isErrorState = True; | ||
3582 | // p->frameState = ZSTD2_STATE_ERROR; | ||
3583 | // if (p->frameState = ZSTD2_STATE_SIGNATURE) return SZ_ERROR_NO_ARCHIVE | ||
3584 | return SZ_ERROR_DATA; | ||
3585 | } | ||
3586 | |||
3587 | |||
3588 | |||
3589 | |||
3590 | SRes ZstdDec_Decode(CZstdDecHandle dec, CZstdDecState *p) | ||
3591 | { | ||
3592 | p->needWrite_Size = 0; | ||
3593 | p->status = ZSTD_STATUS_NOT_SPECIFIED; | ||
3594 | dec->disableHash = p->disableHash; | ||
3595 | |||
3596 | if (p->outBuf_fromCaller) | ||
3597 | { | ||
3598 | dec->decoder.win = p->outBuf_fromCaller; | ||
3599 | dec->decoder.cycSize = p->outBufSize_fromCaller; | ||
3600 | } | ||
3601 | |||
3602 | // p->winPos = dec->decoder.winPos; | ||
3603 | |||
3604 | for (;;) | ||
3605 | { | ||
3606 | SizeT winPos, size; | ||
3607 | // SizeT outProcessed; | ||
3608 | SRes res; | ||
3609 | |||
3610 | if (p->wrPos > dec->decoder.winPos) | ||
3611 | return SZ_ERROR_FAIL; | ||
3612 | |||
3613 | if (dec->frameState == ZSTD2_STATE_FINISHED) | ||
3614 | { | ||
3615 | if (!p->outBuf_fromCaller) | ||
3616 | { | ||
3617 | // we need to set positions to zero for new frame. | ||
3618 | if (p->wrPos != dec->decoder.winPos) | ||
3619 | { | ||
3620 | /* We have already asked the caller to flush all data | ||
3621 | with (p->needWrite_Size) and (ZSTD_STATUS_FINISHED_FRAME) status. | ||
3622 | So it's unexpected case */ | ||
3623 | // p->winPos = dec->decoder.winPos; | ||
3624 | // p->needWrite_Size = dec->decoder.winPos - p->wrPos; // flush size asking | ||
3625 | // return SZ_OK; // ask to flush again | ||
3626 | return SZ_ERROR_FAIL; | ||
3627 | } | ||
3628 | // (p->wrPos == dec->decoder.winPos), and we wrap to zero: | ||
3629 | dec->decoder.winPos = 0; | ||
3630 | p->winPos = 0; | ||
3631 | p->wrPos = 0; | ||
3632 | } | ||
3633 | ZstdDec_Init_ForNewFrame(dec); | ||
3634 | // continue; | ||
3635 | } | ||
3636 | |||
3637 | winPos = dec->decoder.winPos; | ||
3638 | { | ||
3639 | SizeT next = dec->decoder.cycSize; | ||
3640 | /* cycSize == 0, if no buffer was allocated still, | ||
3641 | or, if (outBuf_fromCaller) mode and (outBufSize_fromCaller == 0) */ | ||
3642 | if (!p->outBuf_fromCaller | ||
3643 | && next | ||
3644 | && next <= winPos | ||
3645 | && dec->isCyclicMode) | ||
3646 | { | ||
3647 | // (0 < decoder.cycSize <= winPos) in isCyclicMode. | ||
3648 | // so we need to wrap (winPos) and (wrPos) over (cycSize). | ||
3649 | const size_t delta = next; | ||
3650 | // (delta) is how many bytes we remove from buffer. | ||
3651 | /* | ||
3652 | // we don't need data older than last (cycSize) bytes. | ||
3653 | size_t delta = winPos - next; // num bytes after (cycSize) | ||
3654 | if (delta <= next) // it's expected case | ||
3655 | delta = next; | ||
3656 | // delta == Max(cycSize, winPos - cycSize) | ||
3657 | */ | ||
3658 | if (p->wrPos < delta) | ||
3659 | { | ||
3660 | // (wrPos < decoder.cycSize) | ||
3661 | // We have asked already the caller to flush required data | ||
3662 | // p->status = ZSTD_STATUS_NOT_SPECIFIED; | ||
3663 | // p->winPos = winPos; | ||
3664 | // p->needWrite_Size = delta - p->wrPos; // flush size asking | ||
3665 | // return SZ_OK; // ask to flush again | ||
3666 | return SZ_ERROR_FAIL; | ||
3667 | } | ||
3668 | // p->wrPos >= decoder.cycSize | ||
3669 | // we move extra data after (decoder.cycSize) to start of cyclic buffer: | ||
3670 | winPos -= delta; | ||
3671 | if (winPos) | ||
3672 | { | ||
3673 | if (winPos >= delta) | ||
3674 | return SZ_ERROR_FAIL; | ||
3675 | memmove(dec->decoder.win, dec->decoder.win + delta, winPos); | ||
3676 | // printf("\nmemmove processed=%8x winPos=%8x\n", (unsigned)p->outProcessed, (unsigned)dec->decoder.winPos); | ||
3677 | STAT_INC(g_Num_Wrap_memmove_Num) | ||
3678 | STAT_UPDATE(g_Num_Wrap_memmove_Bytes += (unsigned)winPos;) | ||
3679 | } | ||
3680 | dec->decoder.winPos = winPos; | ||
3681 | p->winPos = winPos; | ||
3682 | p->wrPos -= delta; | ||
3683 | // dec->xxh64_winPos -= delta; | ||
3684 | |||
3685 | // (winPos < delta) | ||
3686 | #ifdef Z7_STD_DEC_USE_AFTER_CYC_BUF | ||
3687 | /* we set the data after cycSize, because | ||
3688 | we don't want to read non-initialized data or junk in CopyMatch(). */ | ||
3689 | memset(dec->decoder.win + next, 0, COPY_CHUNK_SIZE); | ||
3690 | #endif | ||
3691 | |||
3692 | /* | ||
3693 | if (winPos == next) | ||
3694 | { | ||
3695 | if (winPos != p->wrPos) | ||
3696 | { | ||
3697 | // we already requested before to flush full data for that case. | ||
3698 | // but we give the caller a second chance to flush data: | ||
3699 | p->needWrite_Size = winPos - p->wrPos; | ||
3700 | return SZ_OK; | ||
3701 | } | ||
3702 | // (decoder.cycSize == winPos == p->wrPos) | ||
3703 | // so we do second wrapping to zero: | ||
3704 | winPos = 0; | ||
3705 | dec->decoder.winPos = 0; | ||
3706 | p->winPos = 0; | ||
3707 | p->wrPos = 0; | ||
3708 | } | ||
3709 | */ | ||
3710 | // (winPos < next) | ||
3711 | } | ||
3712 | |||
3713 | if (winPos > next) | ||
3714 | return SZ_ERROR_FAIL; // it's unexpected case | ||
3715 | /* | ||
3716 | if (!outBuf_fromCaller && isCyclicMode && cycSize != 0) | ||
3717 | then (winPos < cycSize) | ||
3718 | else (winPos <= cycSize) | ||
3719 | */ | ||
3720 | if (!p->outBuf_fromCaller) | ||
3721 | { | ||
3722 | // that code is optional. We try to optimize write chunk sizes. | ||
3723 | /* (next2) is expected next write position in the caller, | ||
3724 | if the caller writes by kBlockSizeMax chunks. | ||
3725 | */ | ||
3726 | /* | ||
3727 | const size_t next2 = (winPos + kBlockSizeMax) & (kBlockSizeMax - 1); | ||
3728 | if (winPos < next2 && next2 < next) | ||
3729 | next = next2; | ||
3730 | */ | ||
3731 | } | ||
3732 | size = next - winPos; | ||
3733 | } | ||
3734 | |||
3735 | // note: ZstdDec_DecodeBlock() uses (winLimit = winPos + size) only for RLE and RAW blocks | ||
3736 | res = ZstdDec_DecodeBlock(dec, p, size); | ||
3737 | /* | ||
3738 | after one block decoding: | ||
3739 | if (!outBuf_fromCaller && isCyclicMode && cycSize != 0) | ||
3740 | then (winPos < cycSize + max_block_size) | ||
3741 | else (winPos <= cycSize) | ||
3742 | */ | ||
3743 | |||
3744 | if (!p->outBuf_fromCaller) | ||
3745 | p->win = dec->decoder.win; | ||
3746 | p->winPos = dec->decoder.winPos; | ||
3747 | |||
3748 | // outProcessed = dec->decoder.winPos - winPos; | ||
3749 | // p->outProcessed += outProcessed; | ||
3750 | |||
3751 | if (res != SZ_OK) | ||
3752 | return res; | ||
3753 | |||
3754 | if (dec->frameState != ZSTD2_STATE_AFTER_HEADER) | ||
3755 | { | ||
3756 | if (p->outBuf_fromCaller) | ||
3757 | return SZ_OK; | ||
3758 | { | ||
3759 | // !p->outBuf_fromCaller | ||
3760 | /* | ||
3761 | if (ZSTD_STATUS_FINISHED_FRAME), we request full flushing here because | ||
3762 | 1) it's simpler to work with allocation and extracting of next frame, | ||
3763 | 2) it's better to start writing to next new frame with aligned memory | ||
3764 | for faster xxh 64-bit reads. | ||
3765 | */ | ||
3766 | size_t end = dec->decoder.winPos; // end pos for all data flushing | ||
3767 | if (p->status != ZSTD_STATUS_FINISHED_FRAME) | ||
3768 | { | ||
3769 | // we will request flush here only for cases when wrap in cyclic buffer can be required in next call. | ||
3770 | if (!dec->isCyclicMode) | ||
3771 | return SZ_OK; | ||
3772 | // isCyclicMode | ||
3773 | { | ||
3774 | const size_t delta = dec->decoder.cycSize; | ||
3775 | if (end < delta) | ||
3776 | return SZ_OK; // (winPos < cycSize). no need for flush | ||
3777 | // cycSize <= winPos | ||
3778 | // So we ask the caller to flush of (cycSize - wrPos) bytes, | ||
3779 | // and then we will wrap cylicBuffer in next call | ||
3780 | end = delta; | ||
3781 | } | ||
3782 | } | ||
3783 | p->needWrite_Size = end - p->wrPos; | ||
3784 | } | ||
3785 | return SZ_OK; | ||
3786 | } | ||
3787 | |||
3788 | // ZSTD2_STATE_AFTER_HEADER | ||
3789 | { | ||
3790 | BoolInt useCyclic = False; | ||
3791 | size_t cycSize; | ||
3792 | |||
3793 | // p->status = ZSTD_STATUS_NOT_FINISHED; | ||
3794 | if (dec->dictionaryId != 0) | ||
3795 | { | ||
3796 | /* actually we can try to decode some data, | ||
3797 | because it's possible that some data doesn't use dictionary */ | ||
3798 | // p->status = ZSTD_STATUS_NOT_SPECIFIED; | ||
3799 | return SZ_ERROR_UNSUPPORTED; | ||
3800 | } | ||
3801 | |||
3802 | { | ||
3803 | UInt64 winSize = dec->contentSize; | ||
3804 | UInt64 winSize_Allocate = winSize; | ||
3805 | const unsigned descriptor = dec->descriptor; | ||
3806 | |||
3807 | if ((descriptor & DESCRIPTOR_FLAG_SINGLE) == 0) | ||
3808 | { | ||
3809 | const Byte wd = dec->windowDescriptor; | ||
3810 | winSize = (UInt64)(8 + (wd & 7)) << ((wd >> 3) + 10 - 3); | ||
3811 | if (!DESCRIPTOR_Is_ContentSize_Defined(descriptor) | ||
3812 | || winSize_Allocate > winSize) | ||
3813 | { | ||
3814 | winSize_Allocate = winSize; | ||
3815 | useCyclic = True; | ||
3816 | } | ||
3817 | } | ||
3818 | /* | ||
3819 | else | ||
3820 | { | ||
3821 | if (p->info.singleSegment_ContentSize_MAX < winSize) | ||
3822 | p->info.singleSegment_ContentSize_MAX = winSize; | ||
3823 | // p->info.num_SingleSegments++; | ||
3824 | } | ||
3825 | */ | ||
3826 | if (p->info.windowSize_MAX < winSize) | ||
3827 | p->info.windowSize_MAX = winSize; | ||
3828 | if (p->info.windowSize_Allocate_MAX < winSize_Allocate) | ||
3829 | p->info.windowSize_Allocate_MAX = winSize_Allocate; | ||
3830 | /* | ||
3831 | winSize_Allocate is MIN(content_size, window_size_from_descriptor). | ||
3832 | Wven if (content_size < (window_size_from_descriptor)) | ||
3833 | original-zstd still uses (window_size_from_descriptor) to check that decoding is allowed. | ||
3834 | We try to follow original-zstd, and here we check (winSize) instead of (winSize_Allocate)) | ||
3835 | */ | ||
3836 | if ( | ||
3837 | // winSize_Allocate // it's relaxed check | ||
3838 | winSize // it's more strict check to be compatible with original-zstd | ||
3839 | > ((UInt64)1 << MAX_WINDOW_SIZE_LOG)) | ||
3840 | return SZ_ERROR_UNSUPPORTED; // SZ_ERROR_MEM | ||
3841 | cycSize = (size_t)winSize_Allocate; | ||
3842 | if (cycSize != winSize_Allocate) | ||
3843 | return SZ_ERROR_MEM; | ||
3844 | // cycSize <= winSize | ||
3845 | /* later we will use (CZstdDec1::winSize) to check match offsets and check block sizes. | ||
3846 | if (there is window descriptor) | ||
3847 | { | ||
3848 | We will check block size with (window_size_from_descriptor) instead of (winSize_Allocate). | ||
3849 | Does original-zstd do it that way also? | ||
3850 | } | ||
3851 | Here we must reduce full real 64-bit (winSize) to size_t for (CZstdDec1::winSize). | ||
3852 | Also we don't want too big values for (CZstdDec1::winSize). | ||
3853 | our (CZstdDec1::winSize) will meet the condition: | ||
3854 | (CZstdDec1::winSize < kBlockSizeMax || CZstdDec1::winSize <= cycSize). | ||
3855 | */ | ||
3856 | dec->decoder.winSize = (winSize < kBlockSizeMax) ? (size_t)winSize: cycSize; | ||
3857 | // note: (CZstdDec1::winSize > cycSize) is possible, if (!useCyclic) | ||
3858 | } | ||
3859 | |||
3860 | RINOK(ZstdDec_AllocateMisc(dec)) | ||
3861 | |||
3862 | if (p->outBuf_fromCaller) | ||
3863 | dec->isCyclicMode = False; | ||
3864 | else | ||
3865 | { | ||
3866 | size_t d = cycSize; | ||
3867 | |||
3868 | if (dec->decoder.winPos != p->wrPos) | ||
3869 | return SZ_ERROR_FAIL; | ||
3870 | |||
3871 | dec->decoder.winPos = 0; | ||
3872 | p->wrPos = 0; | ||
3873 | p->winPos = dec->decoder.winPos; | ||
3874 | |||
3875 | /* | ||
3876 | const size_t needWrite = dec->decoder.winPos - p->wrPos; | ||
3877 | if (!needWrite) | ||
3878 | { | ||
3879 | dec->decoder.winPos = 0; | ||
3880 | p->wrPos = 0; | ||
3881 | p->winPos = dec->decoder.winPos; | ||
3882 | } | ||
3883 | */ | ||
3884 | /* if (!useCyclic) we allocate only cycSize = ContentSize. | ||
3885 | But if we want to support the case where new frame starts with winPos != 0, | ||
3886 | then we will wrap over zero, and we still need | ||
3887 | to set (useCyclic) and allocate additional buffer spaces. | ||
3888 | Now we don't allow new frame starting with (winPos != 0). | ||
3889 | so (dec->decoder->winPos == 0) | ||
3890 | can use (!useCyclic) with reduced buffer sizes. | ||
3891 | */ | ||
3892 | /* | ||
3893 | if (dec->decoder->winPos != 0) | ||
3894 | useCyclic = True; | ||
3895 | */ | ||
3896 | |||
3897 | if (useCyclic) | ||
3898 | { | ||
3899 | /* cyclyc buffer size must be at least (COPY_CHUNK_SIZE - 1) bytes | ||
3900 | larger than window size, because CopyMatch() can write additional | ||
3901 | (COPY_CHUNK_SIZE - 1) bytes and overwrite oldests data in cyclyc buffer. | ||
3902 | But for performance reasons we align (cycSize) for (kBlockSizeMax). | ||
3903 | also we must provide (cycSize >= max_decoded_data_after_cycSize), | ||
3904 | because after data move wrapping over zero we must provide (winPos < cycSize). | ||
3905 | */ | ||
3906 | const size_t alignSize = kBlockSizeMax; | ||
3907 | /* here we add (1 << 7) instead of (COPY_CHUNK_SIZE - 1), because | ||
3908 | we want to get same (cycSize) for different COPY_CHUNK_SIZE values. */ | ||
3909 | // cycSize += (COPY_CHUNK_SIZE - 1) + (alignSize - 1); // for debug : we can get smallest (cycSize) | ||
3910 | cycSize += (1 << 7) + alignSize; | ||
3911 | cycSize &= ~(size_t)(alignSize - 1); | ||
3912 | // cycSize must be aligned for 32, because xxh requires 32-bytes blocks. | ||
3913 | // cycSize += 12345; // for debug | ||
3914 | // cycSize += 1 << 10; // for debug | ||
3915 | // cycSize += 32; // for debug | ||
3916 | // cycSize += kBlockSizeMax; // for debug | ||
3917 | if (cycSize < d) | ||
3918 | return SZ_ERROR_MEM; | ||
3919 | /* | ||
3920 | in cyclic buffer mode we allow to decode one additional block | ||
3921 | that exceeds (cycSize). | ||
3922 | So we must allocate additional (kBlockSizeMax) bytes after (cycSize). | ||
3923 | if defined(Z7_STD_DEC_USE_AFTER_CYC_BUF) | ||
3924 | { | ||
3925 | we can read (COPY_CHUNK_SIZE - 1) bytes after (cycSize) | ||
3926 | but we aready allocate additional kBlockSizeMax that | ||
3927 | is larger than COPY_CHUNK_SIZE. | ||
3928 | So we don't need additional space of COPY_CHUNK_SIZE after (cycSize). | ||
3929 | } | ||
3930 | */ | ||
3931 | /* | ||
3932 | #ifdef Z7_STD_DEC_USE_AFTER_CYC_BUF | ||
3933 | d = cycSize + (1 << 7); // we must add at least (COPY_CHUNK_SIZE - 1) | ||
3934 | #endif | ||
3935 | */ | ||
3936 | d = cycSize + kBlockSizeMax; | ||
3937 | if (d < cycSize) | ||
3938 | return SZ_ERROR_MEM; | ||
3939 | } | ||
3940 | |||
3941 | { | ||
3942 | const size_t kMinWinAllocSize = 1 << 12; | ||
3943 | if (d < kMinWinAllocSize) | ||
3944 | d = kMinWinAllocSize; | ||
3945 | } | ||
3946 | |||
3947 | if (d > dec->winBufSize_Allocated) | ||
3948 | { | ||
3949 | /* | ||
3950 | if (needWrite) | ||
3951 | { | ||
3952 | p->needWrite_Size = needWrite; | ||
3953 | return SZ_OK; | ||
3954 | // return SZ_ERROR_FAIL; | ||
3955 | } | ||
3956 | */ | ||
3957 | |||
3958 | if (dec->winBufSize_Allocated != 0) | ||
3959 | { | ||
3960 | const size_t k_extra = (useCyclic || d >= (1u << 20)) ? | ||
3961 | 2 * kBlockSizeMax : 0; | ||
3962 | unsigned i = useCyclic ? 17 : 12; | ||
3963 | for (; i < sizeof(size_t) * 8; i++) | ||
3964 | { | ||
3965 | const size_t d2 = ((size_t)1 << i) + k_extra; | ||
3966 | if (d2 >= d) | ||
3967 | { | ||
3968 | d = d2; | ||
3969 | break; | ||
3970 | } | ||
3971 | } | ||
3972 | } | ||
3973 | // RINOK(ZstdDec_AllocateWindow(dec, d)) | ||
3974 | ZstdDec_FreeWindow(dec); | ||
3975 | dec->win_Base = (Byte *)ISzAlloc_Alloc(dec->alloc_Big, d); | ||
3976 | if (!dec->win_Base) | ||
3977 | return SZ_ERROR_MEM; | ||
3978 | dec->decoder.win = dec->win_Base; | ||
3979 | dec->winBufSize_Allocated = d; | ||
3980 | } | ||
3981 | /* | ||
3982 | else | ||
3983 | { | ||
3984 | // for non-cyclycMode we want flush data, and set winPos = 0 | ||
3985 | if (needWrite) | ||
3986 | { | ||
3987 | if (!useCyclic || dec->decoder.winPos >= cycSize) | ||
3988 | { | ||
3989 | p->needWrite_Size = needWrite; | ||
3990 | return SZ_OK; | ||
3991 | // return SZ_ERROR_FAIL; | ||
3992 | } | ||
3993 | } | ||
3994 | } | ||
3995 | */ | ||
3996 | |||
3997 | dec->decoder.cycSize = cycSize; | ||
3998 | p->win = dec->decoder.win; | ||
3999 | // p->cycSize = dec->decoder.cycSize; | ||
4000 | dec->isCyclicMode = (Byte)useCyclic; | ||
4001 | } // (!p->outBuf_fromCaller) end | ||
4002 | |||
4003 | // p->winPos = dec->decoder.winPos; | ||
4004 | dec->frameState = ZSTD2_STATE_BLOCK; | ||
4005 | // continue; | ||
4006 | } // ZSTD2_STATE_AFTER_HEADER end | ||
4007 | } | ||
4008 | } | ||
4009 | |||
4010 | |||
4011 | void ZstdDec_GetResInfo(const CZstdDec *dec, | ||
4012 | const CZstdDecState *p, | ||
4013 | SRes res, | ||
4014 | CZstdDecResInfo *stat) | ||
4015 | { | ||
4016 | // ZstdDecInfo_CLEAR(stat); | ||
4017 | stat->extraSize = 0; | ||
4018 | stat->is_NonFinishedFrame = False; | ||
4019 | if (dec->frameState != ZSTD2_STATE_FINISHED) | ||
4020 | { | ||
4021 | if (dec->frameState == ZSTD2_STATE_SIGNATURE) | ||
4022 | { | ||
4023 | stat->extraSize = (Byte)dec->tempSize; | ||
4024 | if (ZstdDecInfo_GET_NUM_FRAMES(&p->info) == 0) | ||
4025 | res = SZ_ERROR_NO_ARCHIVE; | ||
4026 | } | ||
4027 | else | ||
4028 | { | ||
4029 | stat->is_NonFinishedFrame = True; | ||
4030 | if (res == SZ_OK && p->status == ZSTD_STATUS_NEEDS_MORE_INPUT) | ||
4031 | res = SZ_ERROR_INPUT_EOF; | ||
4032 | } | ||
4033 | } | ||
4034 | stat->decode_SRes = res; | ||
4035 | } | ||
4036 | |||
4037 | |||
4038 | size_t ZstdDec_ReadUnusedFromInBuf( | ||
4039 | CZstdDecHandle dec, | ||
4040 | size_t afterDecoding_tempPos, | ||
4041 | void *data, size_t size) | ||
4042 | { | ||
4043 | size_t processed = 0; | ||
4044 | if (dec->frameState == ZSTD2_STATE_SIGNATURE) | ||
4045 | { | ||
4046 | Byte *dest = (Byte *)data; | ||
4047 | const size_t tempSize = dec->tempSize; | ||
4048 | while (afterDecoding_tempPos < tempSize) | ||
4049 | { | ||
4050 | if (size == 0) | ||
4051 | break; | ||
4052 | size--; | ||
4053 | *dest++ = dec->temp[afterDecoding_tempPos++]; | ||
4054 | processed++; | ||
4055 | } | ||
4056 | } | ||
4057 | return processed; | ||
4058 | } | ||
4059 | |||
4060 | |||
4061 | void ZstdDecState_Clear(CZstdDecState *p) | ||
4062 | { | ||
4063 | memset(p, 0 , sizeof(*p)); | ||
4064 | } | ||