diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2021-12-27 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2022-03-18 15:35:13 +0500 |
commit | f19f813537c7aea1c20749c914e756b54a9c3cf5 (patch) | |
tree | 816ba62ca7c0fa19f2eb46d9e9d6f7dd7c3a744d /C/Lzma2Dec.c | |
parent | 98e06a519b63b81986abe76d28887f6984a7732b (diff) | |
download | 7zip-21.07.tar.gz 7zip-21.07.tar.bz2 7zip-21.07.zip |
'21.07'21.07
Diffstat (limited to 'C/Lzma2Dec.c')
-rw-r--r-- | C/Lzma2Dec.c | 489 |
1 files changed, 489 insertions, 0 deletions
diff --git a/C/Lzma2Dec.c b/C/Lzma2Dec.c new file mode 100644 index 0000000..ac970a8 --- /dev/null +++ b/C/Lzma2Dec.c | |||
@@ -0,0 +1,489 @@ | |||
1 | /* Lzma2Dec.c -- LZMA2 Decoder | ||
2 | 2021-02-09 : Igor Pavlov : Public domain */ | ||
3 | |||
4 | /* #define SHOW_DEBUG_INFO */ | ||
5 | |||
6 | #include "Precomp.h" | ||
7 | |||
8 | #ifdef SHOW_DEBUG_INFO | ||
9 | #include <stdio.h> | ||
10 | #endif | ||
11 | |||
12 | #include <string.h> | ||
13 | |||
14 | #include "Lzma2Dec.h" | ||
15 | |||
16 | /* | ||
17 | 00000000 - End of data | ||
18 | 00000001 U U - Uncompressed, reset dic, need reset state and set new prop | ||
19 | 00000010 U U - Uncompressed, no reset | ||
20 | 100uuuuu U U P P - LZMA, no reset | ||
21 | 101uuuuu U U P P - LZMA, reset state | ||
22 | 110uuuuu U U P P S - LZMA, reset state + set new prop | ||
23 | 111uuuuu U U P P S - LZMA, reset state + set new prop, reset dic | ||
24 | |||
25 | u, U - Unpack Size | ||
26 | P - Pack Size | ||
27 | S - Props | ||
28 | */ | ||
29 | |||
30 | #define LZMA2_CONTROL_COPY_RESET_DIC 1 | ||
31 | |||
32 | #define LZMA2_IS_UNCOMPRESSED_STATE(p) (((p)->control & (1 << 7)) == 0) | ||
33 | |||
34 | #define LZMA2_LCLP_MAX 4 | ||
35 | #define LZMA2_DIC_SIZE_FROM_PROP(p) (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11)) | ||
36 | |||
37 | #ifdef SHOW_DEBUG_INFO | ||
38 | #define PRF(x) x | ||
39 | #else | ||
40 | #define PRF(x) | ||
41 | #endif | ||
42 | |||
43 | typedef enum | ||
44 | { | ||
45 | LZMA2_STATE_CONTROL, | ||
46 | LZMA2_STATE_UNPACK0, | ||
47 | LZMA2_STATE_UNPACK1, | ||
48 | LZMA2_STATE_PACK0, | ||
49 | LZMA2_STATE_PACK1, | ||
50 | LZMA2_STATE_PROP, | ||
51 | LZMA2_STATE_DATA, | ||
52 | LZMA2_STATE_DATA_CONT, | ||
53 | LZMA2_STATE_FINISHED, | ||
54 | LZMA2_STATE_ERROR | ||
55 | } ELzma2State; | ||
56 | |||
57 | static SRes Lzma2Dec_GetOldProps(Byte prop, Byte *props) | ||
58 | { | ||
59 | UInt32 dicSize; | ||
60 | if (prop > 40) | ||
61 | return SZ_ERROR_UNSUPPORTED; | ||
62 | dicSize = (prop == 40) ? 0xFFFFFFFF : LZMA2_DIC_SIZE_FROM_PROP(prop); | ||
63 | props[0] = (Byte)LZMA2_LCLP_MAX; | ||
64 | props[1] = (Byte)(dicSize); | ||
65 | props[2] = (Byte)(dicSize >> 8); | ||
66 | props[3] = (Byte)(dicSize >> 16); | ||
67 | props[4] = (Byte)(dicSize >> 24); | ||
68 | return SZ_OK; | ||
69 | } | ||
70 | |||
71 | SRes Lzma2Dec_AllocateProbs(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc) | ||
72 | { | ||
73 | Byte props[LZMA_PROPS_SIZE]; | ||
74 | RINOK(Lzma2Dec_GetOldProps(prop, props)); | ||
75 | return LzmaDec_AllocateProbs(&p->decoder, props, LZMA_PROPS_SIZE, alloc); | ||
76 | } | ||
77 | |||
78 | SRes Lzma2Dec_Allocate(CLzma2Dec *p, Byte prop, ISzAllocPtr alloc) | ||
79 | { | ||
80 | Byte props[LZMA_PROPS_SIZE]; | ||
81 | RINOK(Lzma2Dec_GetOldProps(prop, props)); | ||
82 | return LzmaDec_Allocate(&p->decoder, props, LZMA_PROPS_SIZE, alloc); | ||
83 | } | ||
84 | |||
85 | void Lzma2Dec_Init(CLzma2Dec *p) | ||
86 | { | ||
87 | p->state = LZMA2_STATE_CONTROL; | ||
88 | p->needInitLevel = 0xE0; | ||
89 | p->isExtraMode = False; | ||
90 | p->unpackSize = 0; | ||
91 | |||
92 | // p->decoder.dicPos = 0; // we can use it instead of full init | ||
93 | LzmaDec_Init(&p->decoder); | ||
94 | } | ||
95 | |||
96 | // ELzma2State | ||
97 | static unsigned Lzma2Dec_UpdateState(CLzma2Dec *p, Byte b) | ||
98 | { | ||
99 | switch (p->state) | ||
100 | { | ||
101 | case LZMA2_STATE_CONTROL: | ||
102 | p->isExtraMode = False; | ||
103 | p->control = b; | ||
104 | PRF(printf("\n %8X", (unsigned)p->decoder.dicPos)); | ||
105 | PRF(printf(" %02X", (unsigned)b)); | ||
106 | if (b == 0) | ||
107 | return LZMA2_STATE_FINISHED; | ||
108 | if (LZMA2_IS_UNCOMPRESSED_STATE(p)) | ||
109 | { | ||
110 | if (b == LZMA2_CONTROL_COPY_RESET_DIC) | ||
111 | p->needInitLevel = 0xC0; | ||
112 | else if (b > 2 || p->needInitLevel == 0xE0) | ||
113 | return LZMA2_STATE_ERROR; | ||
114 | } | ||
115 | else | ||
116 | { | ||
117 | if (b < p->needInitLevel) | ||
118 | return LZMA2_STATE_ERROR; | ||
119 | p->needInitLevel = 0; | ||
120 | p->unpackSize = (UInt32)(b & 0x1F) << 16; | ||
121 | } | ||
122 | return LZMA2_STATE_UNPACK0; | ||
123 | |||
124 | case LZMA2_STATE_UNPACK0: | ||
125 | p->unpackSize |= (UInt32)b << 8; | ||
126 | return LZMA2_STATE_UNPACK1; | ||
127 | |||
128 | case LZMA2_STATE_UNPACK1: | ||
129 | p->unpackSize |= (UInt32)b; | ||
130 | p->unpackSize++; | ||
131 | PRF(printf(" %7u", (unsigned)p->unpackSize)); | ||
132 | return LZMA2_IS_UNCOMPRESSED_STATE(p) ? LZMA2_STATE_DATA : LZMA2_STATE_PACK0; | ||
133 | |||
134 | case LZMA2_STATE_PACK0: | ||
135 | p->packSize = (UInt32)b << 8; | ||
136 | return LZMA2_STATE_PACK1; | ||
137 | |||
138 | case LZMA2_STATE_PACK1: | ||
139 | p->packSize |= (UInt32)b; | ||
140 | p->packSize++; | ||
141 | // if (p->packSize < 5) return LZMA2_STATE_ERROR; | ||
142 | PRF(printf(" %5u", (unsigned)p->packSize)); | ||
143 | return (p->control & 0x40) ? LZMA2_STATE_PROP : LZMA2_STATE_DATA; | ||
144 | |||
145 | case LZMA2_STATE_PROP: | ||
146 | { | ||
147 | unsigned lc, lp; | ||
148 | if (b >= (9 * 5 * 5)) | ||
149 | return LZMA2_STATE_ERROR; | ||
150 | lc = b % 9; | ||
151 | b /= 9; | ||
152 | p->decoder.prop.pb = (Byte)(b / 5); | ||
153 | lp = b % 5; | ||
154 | if (lc + lp > LZMA2_LCLP_MAX) | ||
155 | return LZMA2_STATE_ERROR; | ||
156 | p->decoder.prop.lc = (Byte)lc; | ||
157 | p->decoder.prop.lp = (Byte)lp; | ||
158 | return LZMA2_STATE_DATA; | ||
159 | } | ||
160 | } | ||
161 | return LZMA2_STATE_ERROR; | ||
162 | } | ||
163 | |||
164 | static void LzmaDec_UpdateWithUncompressed(CLzmaDec *p, const Byte *src, SizeT size) | ||
165 | { | ||
166 | memcpy(p->dic + p->dicPos, src, size); | ||
167 | p->dicPos += size; | ||
168 | if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= size) | ||
169 | p->checkDicSize = p->prop.dicSize; | ||
170 | p->processedPos += (UInt32)size; | ||
171 | } | ||
172 | |||
173 | void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState); | ||
174 | |||
175 | |||
176 | SRes Lzma2Dec_DecodeToDic(CLzma2Dec *p, SizeT dicLimit, | ||
177 | const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) | ||
178 | { | ||
179 | SizeT inSize = *srcLen; | ||
180 | *srcLen = 0; | ||
181 | *status = LZMA_STATUS_NOT_SPECIFIED; | ||
182 | |||
183 | while (p->state != LZMA2_STATE_ERROR) | ||
184 | { | ||
185 | SizeT dicPos; | ||
186 | |||
187 | if (p->state == LZMA2_STATE_FINISHED) | ||
188 | { | ||
189 | *status = LZMA_STATUS_FINISHED_WITH_MARK; | ||
190 | return SZ_OK; | ||
191 | } | ||
192 | |||
193 | dicPos = p->decoder.dicPos; | ||
194 | |||
195 | if (dicPos == dicLimit && finishMode == LZMA_FINISH_ANY) | ||
196 | { | ||
197 | *status = LZMA_STATUS_NOT_FINISHED; | ||
198 | return SZ_OK; | ||
199 | } | ||
200 | |||
201 | if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT) | ||
202 | { | ||
203 | if (*srcLen == inSize) | ||
204 | { | ||
205 | *status = LZMA_STATUS_NEEDS_MORE_INPUT; | ||
206 | return SZ_OK; | ||
207 | } | ||
208 | (*srcLen)++; | ||
209 | p->state = Lzma2Dec_UpdateState(p, *src++); | ||
210 | if (dicPos == dicLimit && p->state != LZMA2_STATE_FINISHED) | ||
211 | break; | ||
212 | continue; | ||
213 | } | ||
214 | |||
215 | { | ||
216 | SizeT inCur = inSize - *srcLen; | ||
217 | SizeT outCur = dicLimit - dicPos; | ||
218 | ELzmaFinishMode curFinishMode = LZMA_FINISH_ANY; | ||
219 | |||
220 | if (outCur >= p->unpackSize) | ||
221 | { | ||
222 | outCur = (SizeT)p->unpackSize; | ||
223 | curFinishMode = LZMA_FINISH_END; | ||
224 | } | ||
225 | |||
226 | if (LZMA2_IS_UNCOMPRESSED_STATE(p)) | ||
227 | { | ||
228 | if (inCur == 0) | ||
229 | { | ||
230 | *status = LZMA_STATUS_NEEDS_MORE_INPUT; | ||
231 | return SZ_OK; | ||
232 | } | ||
233 | |||
234 | if (p->state == LZMA2_STATE_DATA) | ||
235 | { | ||
236 | BoolInt initDic = (p->control == LZMA2_CONTROL_COPY_RESET_DIC); | ||
237 | LzmaDec_InitDicAndState(&p->decoder, initDic, False); | ||
238 | } | ||
239 | |||
240 | if (inCur > outCur) | ||
241 | inCur = outCur; | ||
242 | if (inCur == 0) | ||
243 | break; | ||
244 | |||
245 | LzmaDec_UpdateWithUncompressed(&p->decoder, src, inCur); | ||
246 | |||
247 | src += inCur; | ||
248 | *srcLen += inCur; | ||
249 | p->unpackSize -= (UInt32)inCur; | ||
250 | p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT; | ||
251 | } | ||
252 | else | ||
253 | { | ||
254 | SRes res; | ||
255 | |||
256 | if (p->state == LZMA2_STATE_DATA) | ||
257 | { | ||
258 | BoolInt initDic = (p->control >= 0xE0); | ||
259 | BoolInt initState = (p->control >= 0xA0); | ||
260 | LzmaDec_InitDicAndState(&p->decoder, initDic, initState); | ||
261 | p->state = LZMA2_STATE_DATA_CONT; | ||
262 | } | ||
263 | |||
264 | if (inCur > p->packSize) | ||
265 | inCur = (SizeT)p->packSize; | ||
266 | |||
267 | res = LzmaDec_DecodeToDic(&p->decoder, dicPos + outCur, src, &inCur, curFinishMode, status); | ||
268 | |||
269 | src += inCur; | ||
270 | *srcLen += inCur; | ||
271 | p->packSize -= (UInt32)inCur; | ||
272 | outCur = p->decoder.dicPos - dicPos; | ||
273 | p->unpackSize -= (UInt32)outCur; | ||
274 | |||
275 | if (res != 0) | ||
276 | break; | ||
277 | |||
278 | if (*status == LZMA_STATUS_NEEDS_MORE_INPUT) | ||
279 | { | ||
280 | if (p->packSize == 0) | ||
281 | break; | ||
282 | return SZ_OK; | ||
283 | } | ||
284 | |||
285 | if (inCur == 0 && outCur == 0) | ||
286 | { | ||
287 | if (*status != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK | ||
288 | || p->unpackSize != 0 | ||
289 | || p->packSize != 0) | ||
290 | break; | ||
291 | p->state = LZMA2_STATE_CONTROL; | ||
292 | } | ||
293 | |||
294 | *status = LZMA_STATUS_NOT_SPECIFIED; | ||
295 | } | ||
296 | } | ||
297 | } | ||
298 | |||
299 | *status = LZMA_STATUS_NOT_SPECIFIED; | ||
300 | p->state = LZMA2_STATE_ERROR; | ||
301 | return SZ_ERROR_DATA; | ||
302 | } | ||
303 | |||
304 | |||
305 | |||
306 | |||
307 | ELzma2ParseStatus Lzma2Dec_Parse(CLzma2Dec *p, | ||
308 | SizeT outSize, | ||
309 | const Byte *src, SizeT *srcLen, | ||
310 | int checkFinishBlock) | ||
311 | { | ||
312 | SizeT inSize = *srcLen; | ||
313 | *srcLen = 0; | ||
314 | |||
315 | while (p->state != LZMA2_STATE_ERROR) | ||
316 | { | ||
317 | if (p->state == LZMA2_STATE_FINISHED) | ||
318 | return (ELzma2ParseStatus)LZMA_STATUS_FINISHED_WITH_MARK; | ||
319 | |||
320 | if (outSize == 0 && !checkFinishBlock) | ||
321 | return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED; | ||
322 | |||
323 | if (p->state != LZMA2_STATE_DATA && p->state != LZMA2_STATE_DATA_CONT) | ||
324 | { | ||
325 | if (*srcLen == inSize) | ||
326 | return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT; | ||
327 | (*srcLen)++; | ||
328 | |||
329 | p->state = Lzma2Dec_UpdateState(p, *src++); | ||
330 | |||
331 | if (p->state == LZMA2_STATE_UNPACK0) | ||
332 | { | ||
333 | // if (p->decoder.dicPos != 0) | ||
334 | if (p->control == LZMA2_CONTROL_COPY_RESET_DIC || p->control >= 0xE0) | ||
335 | return LZMA2_PARSE_STATUS_NEW_BLOCK; | ||
336 | // if (outSize == 0) return LZMA_STATUS_NOT_FINISHED; | ||
337 | } | ||
338 | |||
339 | // The following code can be commented. | ||
340 | // It's not big problem, if we read additional input bytes. | ||
341 | // It will be stopped later in LZMA2_STATE_DATA / LZMA2_STATE_DATA_CONT state. | ||
342 | |||
343 | if (outSize == 0 && p->state != LZMA2_STATE_FINISHED) | ||
344 | { | ||
345 | // checkFinishBlock is true. So we expect that block must be finished, | ||
346 | // We can return LZMA_STATUS_NOT_SPECIFIED or LZMA_STATUS_NOT_FINISHED here | ||
347 | // break; | ||
348 | return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED; | ||
349 | } | ||
350 | |||
351 | if (p->state == LZMA2_STATE_DATA) | ||
352 | return LZMA2_PARSE_STATUS_NEW_CHUNK; | ||
353 | |||
354 | continue; | ||
355 | } | ||
356 | |||
357 | if (outSize == 0) | ||
358 | return (ELzma2ParseStatus)LZMA_STATUS_NOT_FINISHED; | ||
359 | |||
360 | { | ||
361 | SizeT inCur = inSize - *srcLen; | ||
362 | |||
363 | if (LZMA2_IS_UNCOMPRESSED_STATE(p)) | ||
364 | { | ||
365 | if (inCur == 0) | ||
366 | return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT; | ||
367 | if (inCur > p->unpackSize) | ||
368 | inCur = p->unpackSize; | ||
369 | if (inCur > outSize) | ||
370 | inCur = outSize; | ||
371 | p->decoder.dicPos += inCur; | ||
372 | src += inCur; | ||
373 | *srcLen += inCur; | ||
374 | outSize -= inCur; | ||
375 | p->unpackSize -= (UInt32)inCur; | ||
376 | p->state = (p->unpackSize == 0) ? LZMA2_STATE_CONTROL : LZMA2_STATE_DATA_CONT; | ||
377 | } | ||
378 | else | ||
379 | { | ||
380 | p->isExtraMode = True; | ||
381 | |||
382 | if (inCur == 0) | ||
383 | { | ||
384 | if (p->packSize != 0) | ||
385 | return (ELzma2ParseStatus)LZMA_STATUS_NEEDS_MORE_INPUT; | ||
386 | } | ||
387 | else if (p->state == LZMA2_STATE_DATA) | ||
388 | { | ||
389 | p->state = LZMA2_STATE_DATA_CONT; | ||
390 | if (*src != 0) | ||
391 | { | ||
392 | // first byte of lzma chunk must be Zero | ||
393 | *srcLen += 1; | ||
394 | p->packSize--; | ||
395 | break; | ||
396 | } | ||
397 | } | ||
398 | |||
399 | if (inCur > p->packSize) | ||
400 | inCur = (SizeT)p->packSize; | ||
401 | |||
402 | src += inCur; | ||
403 | *srcLen += inCur; | ||
404 | p->packSize -= (UInt32)inCur; | ||
405 | |||
406 | if (p->packSize == 0) | ||
407 | { | ||
408 | SizeT rem = outSize; | ||
409 | if (rem > p->unpackSize) | ||
410 | rem = p->unpackSize; | ||
411 | p->decoder.dicPos += rem; | ||
412 | p->unpackSize -= (UInt32)rem; | ||
413 | outSize -= rem; | ||
414 | if (p->unpackSize == 0) | ||
415 | p->state = LZMA2_STATE_CONTROL; | ||
416 | } | ||
417 | } | ||
418 | } | ||
419 | } | ||
420 | |||
421 | p->state = LZMA2_STATE_ERROR; | ||
422 | return (ELzma2ParseStatus)LZMA_STATUS_NOT_SPECIFIED; | ||
423 | } | ||
424 | |||
425 | |||
426 | |||
427 | |||
428 | SRes Lzma2Dec_DecodeToBuf(CLzma2Dec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) | ||
429 | { | ||
430 | SizeT outSize = *destLen, inSize = *srcLen; | ||
431 | *srcLen = *destLen = 0; | ||
432 | |||
433 | for (;;) | ||
434 | { | ||
435 | SizeT inCur = inSize, outCur, dicPos; | ||
436 | ELzmaFinishMode curFinishMode; | ||
437 | SRes res; | ||
438 | |||
439 | if (p->decoder.dicPos == p->decoder.dicBufSize) | ||
440 | p->decoder.dicPos = 0; | ||
441 | dicPos = p->decoder.dicPos; | ||
442 | curFinishMode = LZMA_FINISH_ANY; | ||
443 | outCur = p->decoder.dicBufSize - dicPos; | ||
444 | |||
445 | if (outCur >= outSize) | ||
446 | { | ||
447 | outCur = outSize; | ||
448 | curFinishMode = finishMode; | ||
449 | } | ||
450 | |||
451 | res = Lzma2Dec_DecodeToDic(p, dicPos + outCur, src, &inCur, curFinishMode, status); | ||
452 | |||
453 | src += inCur; | ||
454 | inSize -= inCur; | ||
455 | *srcLen += inCur; | ||
456 | outCur = p->decoder.dicPos - dicPos; | ||
457 | memcpy(dest, p->decoder.dic + dicPos, outCur); | ||
458 | dest += outCur; | ||
459 | outSize -= outCur; | ||
460 | *destLen += outCur; | ||
461 | if (res != 0) | ||
462 | return res; | ||
463 | if (outCur == 0 || outSize == 0) | ||
464 | return SZ_OK; | ||
465 | } | ||
466 | } | ||
467 | |||
468 | |||
469 | SRes Lzma2Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, | ||
470 | Byte prop, ELzmaFinishMode finishMode, ELzmaStatus *status, ISzAllocPtr alloc) | ||
471 | { | ||
472 | CLzma2Dec p; | ||
473 | SRes res; | ||
474 | SizeT outSize = *destLen, inSize = *srcLen; | ||
475 | *destLen = *srcLen = 0; | ||
476 | *status = LZMA_STATUS_NOT_SPECIFIED; | ||
477 | Lzma2Dec_Construct(&p); | ||
478 | RINOK(Lzma2Dec_AllocateProbs(&p, prop, alloc)); | ||
479 | p.decoder.dic = dest; | ||
480 | p.decoder.dicBufSize = outSize; | ||
481 | Lzma2Dec_Init(&p); | ||
482 | *srcLen = inSize; | ||
483 | res = Lzma2Dec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); | ||
484 | *destLen = p.decoder.dicPos; | ||
485 | if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) | ||
486 | res = SZ_ERROR_INPUT_EOF; | ||
487 | Lzma2Dec_FreeProbs(&p, alloc); | ||
488 | return res; | ||
489 | } | ||