diff options
Diffstat (limited to 'Asm/x86/LzmaDecOpt.asm')
-rw-r--r-- | Asm/x86/LzmaDecOpt.asm | 1303 |
1 files changed, 1303 insertions, 0 deletions
diff --git a/Asm/x86/LzmaDecOpt.asm b/Asm/x86/LzmaDecOpt.asm new file mode 100644 index 0000000..f2818e7 --- /dev/null +++ b/Asm/x86/LzmaDecOpt.asm | |||
@@ -0,0 +1,1303 @@ | |||
1 | ; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function | ||
2 | ; 2021-02-23: Igor Pavlov : Public domain | ||
3 | ; | ||
4 | ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*() | ||
5 | ; function for check at link time. | ||
6 | ; That code is tightly coupled with LzmaDec_TryDummy() | ||
7 | ; and with another functions in LzmaDec.c file. | ||
8 | ; CLzmaDec structure, (probs) array layout, input and output of | ||
9 | ; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM). | ||
10 | |||
11 | ifndef x64 | ||
12 | ; x64=1 | ||
13 | ; .err <x64_IS_REQUIRED> | ||
14 | endif | ||
15 | |||
16 | include 7zAsm.asm | ||
17 | |||
18 | MY_ASM_START | ||
19 | |||
20 | _TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE' | ||
21 | |||
22 | MY_ALIGN macro num:req | ||
23 | align num | ||
24 | endm | ||
25 | |||
26 | MY_ALIGN_16 macro | ||
27 | MY_ALIGN 16 | ||
28 | endm | ||
29 | |||
30 | MY_ALIGN_32 macro | ||
31 | MY_ALIGN 32 | ||
32 | endm | ||
33 | |||
34 | MY_ALIGN_64 macro | ||
35 | MY_ALIGN 64 | ||
36 | endm | ||
37 | |||
38 | |||
39 | ; _LZMA_SIZE_OPT equ 1 | ||
40 | |||
41 | ; _LZMA_PROB32 equ 1 | ||
42 | |||
43 | ifdef _LZMA_PROB32 | ||
44 | PSHIFT equ 2 | ||
45 | PLOAD macro dest, mem | ||
46 | mov dest, dword ptr [mem] | ||
47 | endm | ||
48 | PSTORE macro src, mem | ||
49 | mov dword ptr [mem], src | ||
50 | endm | ||
51 | else | ||
52 | PSHIFT equ 1 | ||
53 | PLOAD macro dest, mem | ||
54 | movzx dest, word ptr [mem] | ||
55 | endm | ||
56 | PSTORE macro src, mem | ||
57 | mov word ptr [mem], @CatStr(src, _W) | ||
58 | endm | ||
59 | endif | ||
60 | |||
61 | PMULT equ (1 SHL PSHIFT) | ||
62 | PMULT_HALF equ (1 SHL (PSHIFT - 1)) | ||
63 | PMULT_2 equ (1 SHL (PSHIFT + 1)) | ||
64 | |||
65 | kMatchSpecLen_Error_Data equ (1 SHL 9) | ||
66 | |||
67 | ; x0 range | ||
68 | ; x1 pbPos / (prob) TREE | ||
69 | ; x2 probBranch / prm (MATCHED) / pbPos / cnt | ||
70 | ; x3 sym | ||
71 | ;====== r4 === RSP | ||
72 | ; x5 cod | ||
73 | ; x6 t1 NORM_CALC / probs_state / dist | ||
74 | ; x7 t0 NORM_CALC / prob2 IF_BIT_1 | ||
75 | ; x8 state | ||
76 | ; x9 match (MATCHED) / sym2 / dist2 / lpMask_reg | ||
77 | ; x10 kBitModelTotal_reg | ||
78 | ; r11 probs | ||
79 | ; x12 offs (MATCHED) / dic / len_temp | ||
80 | ; x13 processedPos | ||
81 | ; x14 bit (MATCHED) / dicPos | ||
82 | ; r15 buf | ||
83 | |||
84 | |||
85 | cod equ x5 | ||
86 | cod_L equ x5_L | ||
87 | range equ x0 | ||
88 | state equ x8 | ||
89 | state_R equ r8 | ||
90 | buf equ r15 | ||
91 | processedPos equ x13 | ||
92 | kBitModelTotal_reg equ x10 | ||
93 | |||
94 | probBranch equ x2 | ||
95 | probBranch_R equ r2 | ||
96 | probBranch_W equ x2_W | ||
97 | |||
98 | pbPos equ x1 | ||
99 | pbPos_R equ r1 | ||
100 | |||
101 | cnt equ x2 | ||
102 | cnt_R equ r2 | ||
103 | |||
104 | lpMask_reg equ x9 | ||
105 | dicPos equ r14 | ||
106 | |||
107 | sym equ x3 | ||
108 | sym_R equ r3 | ||
109 | sym_L equ x3_L | ||
110 | |||
111 | probs equ r11 | ||
112 | dic equ r12 | ||
113 | |||
114 | t0 equ x7 | ||
115 | t0_W equ x7_W | ||
116 | t0_R equ r7 | ||
117 | |||
118 | prob2 equ t0 | ||
119 | prob2_W equ t0_W | ||
120 | |||
121 | t1 equ x6 | ||
122 | t1_R equ r6 | ||
123 | |||
124 | probs_state equ t1 | ||
125 | probs_state_R equ t1_R | ||
126 | |||
127 | prm equ r2 | ||
128 | match equ x9 | ||
129 | match_R equ r9 | ||
130 | offs equ x12 | ||
131 | offs_R equ r12 | ||
132 | bit equ x14 | ||
133 | bit_R equ r14 | ||
134 | |||
135 | sym2 equ x9 | ||
136 | sym2_R equ r9 | ||
137 | |||
138 | len_temp equ x12 | ||
139 | |||
140 | dist equ sym | ||
141 | dist2 equ x9 | ||
142 | |||
143 | |||
144 | |||
145 | kNumBitModelTotalBits equ 11 | ||
146 | kBitModelTotal equ (1 SHL kNumBitModelTotalBits) | ||
147 | kNumMoveBits equ 5 | ||
148 | kBitModelOffset equ ((1 SHL kNumMoveBits) - 1) | ||
149 | kTopValue equ (1 SHL 24) | ||
150 | |||
151 | NORM_2 macro | ||
152 | ; movzx t0, BYTE PTR [buf] | ||
153 | shl cod, 8 | ||
154 | mov cod_L, BYTE PTR [buf] | ||
155 | shl range, 8 | ||
156 | ; or cod, t0 | ||
157 | inc buf | ||
158 | endm | ||
159 | |||
160 | |||
161 | NORM macro | ||
162 | cmp range, kTopValue | ||
163 | jae SHORT @F | ||
164 | NORM_2 | ||
165 | @@: | ||
166 | endm | ||
167 | |||
168 | |||
169 | ; ---------- Branch MACROS ---------- | ||
170 | |||
171 | UPDATE_0 macro probsArray:req, probOffset:req, probDisp:req | ||
172 | mov prob2, kBitModelTotal_reg | ||
173 | sub prob2, probBranch | ||
174 | shr prob2, kNumMoveBits | ||
175 | add probBranch, prob2 | ||
176 | PSTORE probBranch, probOffset * 1 + probsArray + probDisp * PMULT | ||
177 | endm | ||
178 | |||
179 | |||
180 | UPDATE_1 macro probsArray:req, probOffset:req, probDisp:req | ||
181 | sub prob2, range | ||
182 | sub cod, range | ||
183 | mov range, prob2 | ||
184 | mov prob2, probBranch | ||
185 | shr probBranch, kNumMoveBits | ||
186 | sub prob2, probBranch | ||
187 | PSTORE prob2, probOffset * 1 + probsArray + probDisp * PMULT | ||
188 | endm | ||
189 | |||
190 | |||
191 | CMP_COD macro probsArray:req, probOffset:req, probDisp:req | ||
192 | PLOAD probBranch, probOffset * 1 + probsArray + probDisp * PMULT | ||
193 | NORM | ||
194 | mov prob2, range | ||
195 | shr range, kNumBitModelTotalBits | ||
196 | imul range, probBranch | ||
197 | cmp cod, range | ||
198 | endm | ||
199 | |||
200 | |||
201 | IF_BIT_1_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req | ||
202 | CMP_COD probsArray, probOffset, probDisp | ||
203 | jae toLabel | ||
204 | endm | ||
205 | |||
206 | |||
207 | IF_BIT_1 macro probsArray:req, probOffset:req, probDisp:req, toLabel:req | ||
208 | IF_BIT_1_NOUP probsArray, probOffset, probDisp, toLabel | ||
209 | UPDATE_0 probsArray, probOffset, probDisp | ||
210 | endm | ||
211 | |||
212 | |||
213 | IF_BIT_0_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req | ||
214 | CMP_COD probsArray, probOffset, probDisp | ||
215 | jb toLabel | ||
216 | endm | ||
217 | |||
218 | |||
219 | ; ---------- CMOV MACROS ---------- | ||
220 | |||
221 | NORM_CALC macro prob:req | ||
222 | NORM | ||
223 | mov t0, range | ||
224 | shr range, kNumBitModelTotalBits | ||
225 | imul range, prob | ||
226 | sub t0, range | ||
227 | mov t1, cod | ||
228 | sub cod, range | ||
229 | endm | ||
230 | |||
231 | |||
232 | PUP macro prob:req, probPtr:req | ||
233 | sub t0, prob | ||
234 | ; only sar works for both 16/32 bit prob modes | ||
235 | sar t0, kNumMoveBits | ||
236 | add t0, prob | ||
237 | PSTORE t0, probPtr | ||
238 | endm | ||
239 | |||
240 | |||
241 | PUP_SUB macro prob:req, probPtr:req, symSub:req | ||
242 | sbb sym, symSub | ||
243 | PUP prob, probPtr | ||
244 | endm | ||
245 | |||
246 | |||
247 | PUP_COD macro prob:req, probPtr:req, symSub:req | ||
248 | mov t0, kBitModelOffset | ||
249 | cmovb cod, t1 | ||
250 | mov t1, sym | ||
251 | cmovb t0, kBitModelTotal_reg | ||
252 | PUP_SUB prob, probPtr, symSub | ||
253 | endm | ||
254 | |||
255 | |||
256 | BIT_0 macro prob:req, probNext:req | ||
257 | PLOAD prob, probs + 1 * PMULT | ||
258 | PLOAD probNext, probs + 1 * PMULT_2 | ||
259 | |||
260 | NORM_CALC prob | ||
261 | |||
262 | cmovae range, t0 | ||
263 | PLOAD t0, probs + 1 * PMULT_2 + PMULT | ||
264 | cmovae probNext, t0 | ||
265 | mov t0, kBitModelOffset | ||
266 | cmovb cod, t1 | ||
267 | cmovb t0, kBitModelTotal_reg | ||
268 | mov sym, 2 | ||
269 | PUP_SUB prob, probs + 1 * PMULT, 0 - 1 | ||
270 | endm | ||
271 | |||
272 | |||
273 | BIT_1 macro prob:req, probNext:req | ||
274 | PLOAD probNext, probs + sym_R * PMULT_2 | ||
275 | add sym, sym | ||
276 | |||
277 | NORM_CALC prob | ||
278 | |||
279 | cmovae range, t0 | ||
280 | PLOAD t0, probs + sym_R * PMULT + PMULT | ||
281 | cmovae probNext, t0 | ||
282 | PUP_COD prob, probs + t1_R * PMULT_HALF, 0 - 1 | ||
283 | endm | ||
284 | |||
285 | |||
286 | BIT_2 macro prob:req, symSub:req | ||
287 | add sym, sym | ||
288 | |||
289 | NORM_CALC prob | ||
290 | |||
291 | cmovae range, t0 | ||
292 | PUP_COD prob, probs + t1_R * PMULT_HALF, symSub | ||
293 | endm | ||
294 | |||
295 | |||
296 | ; ---------- MATCHED LITERAL ---------- | ||
297 | |||
298 | LITM_0 macro | ||
299 | mov offs, 256 * PMULT | ||
300 | shl match, (PSHIFT + 1) | ||
301 | mov bit, offs | ||
302 | and bit, match | ||
303 | PLOAD x1, probs + 256 * PMULT + bit_R * 1 + 1 * PMULT | ||
304 | lea prm, [probs + 256 * PMULT + bit_R * 1 + 1 * PMULT] | ||
305 | ; lea prm, [probs + 256 * PMULT + 1 * PMULT] | ||
306 | ; add prm, bit_R | ||
307 | xor offs, bit | ||
308 | add match, match | ||
309 | |||
310 | NORM_CALC x1 | ||
311 | |||
312 | cmovae offs, bit | ||
313 | mov bit, match | ||
314 | cmovae range, t0 | ||
315 | mov t0, kBitModelOffset | ||
316 | cmovb cod, t1 | ||
317 | cmovb t0, kBitModelTotal_reg | ||
318 | mov sym, 0 | ||
319 | PUP_SUB x1, prm, -2-1 | ||
320 | endm | ||
321 | |||
322 | |||
323 | LITM macro | ||
324 | and bit, offs | ||
325 | lea prm, [probs + offs_R * 1] | ||
326 | add prm, bit_R | ||
327 | PLOAD x1, prm + sym_R * PMULT | ||
328 | xor offs, bit | ||
329 | add sym, sym | ||
330 | add match, match | ||
331 | |||
332 | NORM_CALC x1 | ||
333 | |||
334 | cmovae offs, bit | ||
335 | mov bit, match | ||
336 | cmovae range, t0 | ||
337 | PUP_COD x1, prm + t1_R * PMULT_HALF, - 1 | ||
338 | endm | ||
339 | |||
340 | |||
341 | LITM_2 macro | ||
342 | and bit, offs | ||
343 | lea prm, [probs + offs_R * 1] | ||
344 | add prm, bit_R | ||
345 | PLOAD x1, prm + sym_R * PMULT | ||
346 | add sym, sym | ||
347 | |||
348 | NORM_CALC x1 | ||
349 | |||
350 | cmovae range, t0 | ||
351 | PUP_COD x1, prm + t1_R * PMULT_HALF, 256 - 1 | ||
352 | endm | ||
353 | |||
354 | |||
355 | ; ---------- REVERSE BITS ---------- | ||
356 | |||
357 | REV_0 macro prob:req, probNext:req | ||
358 | ; PLOAD prob, probs + 1 * PMULT | ||
359 | ; lea sym2_R, [probs + 2 * PMULT] | ||
360 | ; PLOAD probNext, probs + 2 * PMULT | ||
361 | PLOAD probNext, sym2_R | ||
362 | |||
363 | NORM_CALC prob | ||
364 | |||
365 | cmovae range, t0 | ||
366 | PLOAD t0, probs + 3 * PMULT | ||
367 | cmovae probNext, t0 | ||
368 | cmovb cod, t1 | ||
369 | mov t0, kBitModelOffset | ||
370 | cmovb t0, kBitModelTotal_reg | ||
371 | lea t1_R, [probs + 3 * PMULT] | ||
372 | cmovae sym2_R, t1_R | ||
373 | PUP prob, probs + 1 * PMULT | ||
374 | endm | ||
375 | |||
376 | |||
377 | REV_1 macro prob:req, probNext:req, step:req | ||
378 | add sym2_R, step * PMULT | ||
379 | PLOAD probNext, sym2_R | ||
380 | |||
381 | NORM_CALC prob | ||
382 | |||
383 | cmovae range, t0 | ||
384 | PLOAD t0, sym2_R + step * PMULT | ||
385 | cmovae probNext, t0 | ||
386 | cmovb cod, t1 | ||
387 | mov t0, kBitModelOffset | ||
388 | cmovb t0, kBitModelTotal_reg | ||
389 | lea t1_R, [sym2_R + step * PMULT] | ||
390 | cmovae sym2_R, t1_R | ||
391 | PUP prob, t1_R - step * PMULT_2 | ||
392 | endm | ||
393 | |||
394 | |||
395 | REV_2 macro prob:req, step:req | ||
396 | sub sym2_R, probs | ||
397 | shr sym2, PSHIFT | ||
398 | or sym, sym2 | ||
399 | |||
400 | NORM_CALC prob | ||
401 | |||
402 | cmovae range, t0 | ||
403 | lea t0, [sym - step] | ||
404 | cmovb sym, t0 | ||
405 | cmovb cod, t1 | ||
406 | mov t0, kBitModelOffset | ||
407 | cmovb t0, kBitModelTotal_reg | ||
408 | PUP prob, probs + sym2_R * PMULT | ||
409 | endm | ||
410 | |||
411 | |||
412 | REV_1_VAR macro prob:req | ||
413 | PLOAD prob, sym_R | ||
414 | mov probs, sym_R | ||
415 | add sym_R, sym2_R | ||
416 | |||
417 | NORM_CALC prob | ||
418 | |||
419 | cmovae range, t0 | ||
420 | lea t0_R, [sym_R + 1 * sym2_R] | ||
421 | cmovae sym_R, t0_R | ||
422 | mov t0, kBitModelOffset | ||
423 | cmovb cod, t1 | ||
424 | ; mov t1, kBitModelTotal | ||
425 | ; cmovb t0, t1 | ||
426 | cmovb t0, kBitModelTotal_reg | ||
427 | add sym2, sym2 | ||
428 | PUP prob, probs | ||
429 | endm | ||
430 | |||
431 | |||
432 | |||
433 | |||
434 | LIT_PROBS macro lpMaskParam:req | ||
435 | ; prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc); | ||
436 | mov t0, processedPos | ||
437 | shl t0, 8 | ||
438 | add sym, t0 | ||
439 | and sym, lpMaskParam | ||
440 | add probs_state_R, pbPos_R | ||
441 | mov x1, LOC lc2 | ||
442 | lea sym, dword ptr[sym_R + 2 * sym_R] | ||
443 | add probs, Literal * PMULT | ||
444 | shl sym, x1_L | ||
445 | add probs, sym_R | ||
446 | UPDATE_0 probs_state_R, 0, IsMatch | ||
447 | inc processedPos | ||
448 | endm | ||
449 | |||
450 | |||
451 | |||
452 | kNumPosBitsMax equ 4 | ||
453 | kNumPosStatesMax equ (1 SHL kNumPosBitsMax) | ||
454 | |||
455 | kLenNumLowBits equ 3 | ||
456 | kLenNumLowSymbols equ (1 SHL kLenNumLowBits) | ||
457 | kLenNumHighBits equ 8 | ||
458 | kLenNumHighSymbols equ (1 SHL kLenNumHighBits) | ||
459 | kNumLenProbs equ (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols) | ||
460 | |||
461 | LenLow equ 0 | ||
462 | LenChoice equ LenLow | ||
463 | LenChoice2 equ (LenLow + kLenNumLowSymbols) | ||
464 | LenHigh equ (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax) | ||
465 | |||
466 | kNumStates equ 12 | ||
467 | kNumStates2 equ 16 | ||
468 | kNumLitStates equ 7 | ||
469 | |||
470 | kStartPosModelIndex equ 4 | ||
471 | kEndPosModelIndex equ 14 | ||
472 | kNumFullDistances equ (1 SHL (kEndPosModelIndex SHR 1)) | ||
473 | |||
474 | kNumPosSlotBits equ 6 | ||
475 | kNumLenToPosStates equ 4 | ||
476 | |||
477 | kNumAlignBits equ 4 | ||
478 | kAlignTableSize equ (1 SHL kNumAlignBits) | ||
479 | |||
480 | kMatchMinLen equ 2 | ||
481 | kMatchSpecLenStart equ (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols) | ||
482 | |||
483 | kStartOffset equ 1664 | ||
484 | SpecPos equ (-kStartOffset) | ||
485 | IsRep0Long equ (SpecPos + kNumFullDistances) | ||
486 | RepLenCoder equ (IsRep0Long + (kNumStates2 SHL kNumPosBitsMax)) | ||
487 | LenCoder equ (RepLenCoder + kNumLenProbs) | ||
488 | IsMatch equ (LenCoder + kNumLenProbs) | ||
489 | kAlign equ (IsMatch + (kNumStates2 SHL kNumPosBitsMax)) | ||
490 | IsRep equ (kAlign + kAlignTableSize) | ||
491 | IsRepG0 equ (IsRep + kNumStates) | ||
492 | IsRepG1 equ (IsRepG0 + kNumStates) | ||
493 | IsRepG2 equ (IsRepG1 + kNumStates) | ||
494 | PosSlot equ (IsRepG2 + kNumStates) | ||
495 | Literal equ (PosSlot + (kNumLenToPosStates SHL kNumPosSlotBits)) | ||
496 | NUM_BASE_PROBS equ (Literal + kStartOffset) | ||
497 | |||
498 | if kAlign ne 0 | ||
499 | .err <Stop_Compiling_Bad_LZMA_kAlign> | ||
500 | endif | ||
501 | |||
502 | if NUM_BASE_PROBS ne 1984 | ||
503 | .err <Stop_Compiling_Bad_LZMA_PROBS> | ||
504 | endif | ||
505 | |||
506 | |||
507 | PTR_FIELD equ dq ? | ||
508 | |||
509 | CLzmaDec_Asm struct | ||
510 | lc db ? | ||
511 | lp db ? | ||
512 | pb db ? | ||
513 | _pad_ db ? | ||
514 | dicSize dd ? | ||
515 | |||
516 | probs_Spec PTR_FIELD | ||
517 | probs_1664 PTR_FIELD | ||
518 | dic_Spec PTR_FIELD | ||
519 | dicBufSize PTR_FIELD | ||
520 | dicPos_Spec PTR_FIELD | ||
521 | buf_Spec PTR_FIELD | ||
522 | |||
523 | range_Spec dd ? | ||
524 | code_Spec dd ? | ||
525 | processedPos_Spec dd ? | ||
526 | checkDicSize dd ? | ||
527 | rep0 dd ? | ||
528 | rep1 dd ? | ||
529 | rep2 dd ? | ||
530 | rep3 dd ? | ||
531 | state_Spec dd ? | ||
532 | remainLen dd ? | ||
533 | CLzmaDec_Asm ends | ||
534 | |||
535 | |||
536 | CLzmaDec_Asm_Loc struct | ||
537 | OLD_RSP PTR_FIELD | ||
538 | lzmaPtr PTR_FIELD | ||
539 | _pad0_ PTR_FIELD | ||
540 | _pad1_ PTR_FIELD | ||
541 | _pad2_ PTR_FIELD | ||
542 | dicBufSize PTR_FIELD | ||
543 | probs_Spec PTR_FIELD | ||
544 | dic_Spec PTR_FIELD | ||
545 | |||
546 | limit PTR_FIELD | ||
547 | bufLimit PTR_FIELD | ||
548 | lc2 dd ? | ||
549 | lpMask dd ? | ||
550 | pbMask dd ? | ||
551 | checkDicSize dd ? | ||
552 | |||
553 | _pad_ dd ? | ||
554 | remainLen dd ? | ||
555 | dicPos_Spec PTR_FIELD | ||
556 | rep0 dd ? | ||
557 | rep1 dd ? | ||
558 | rep2 dd ? | ||
559 | rep3 dd ? | ||
560 | CLzmaDec_Asm_Loc ends | ||
561 | |||
562 | |||
563 | GLOB_2 equ [sym_R].CLzmaDec_Asm. | ||
564 | GLOB equ [r1].CLzmaDec_Asm. | ||
565 | LOC_0 equ [r0].CLzmaDec_Asm_Loc. | ||
566 | LOC equ [RSP].CLzmaDec_Asm_Loc. | ||
567 | |||
568 | |||
569 | COPY_VAR macro name | ||
570 | mov t0, GLOB_2 name | ||
571 | mov LOC_0 name, t0 | ||
572 | endm | ||
573 | |||
574 | |||
575 | RESTORE_VAR macro name | ||
576 | mov t0, LOC name | ||
577 | mov GLOB name, t0 | ||
578 | endm | ||
579 | |||
580 | |||
581 | |||
582 | IsMatchBranch_Pre macro reg | ||
583 | ; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; | ||
584 | mov pbPos, LOC pbMask | ||
585 | and pbPos, processedPos | ||
586 | shl pbPos, (kLenNumLowBits + 1 + PSHIFT) | ||
587 | lea probs_state_R, [probs + 1 * state_R] | ||
588 | endm | ||
589 | |||
590 | |||
591 | IsMatchBranch macro reg | ||
592 | IsMatchBranch_Pre | ||
593 | IF_BIT_1 probs_state_R, pbPos_R, IsMatch, IsMatch_label | ||
594 | endm | ||
595 | |||
596 | |||
597 | CheckLimits macro reg | ||
598 | cmp buf, LOC bufLimit | ||
599 | jae fin_OK | ||
600 | cmp dicPos, LOC limit | ||
601 | jae fin_OK | ||
602 | endm | ||
603 | |||
604 | |||
605 | |||
606 | ; RSP is (16x + 8) bytes aligned in WIN64-x64 | ||
607 | ; LocalSize equ ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8) | ||
608 | |||
609 | PARAM_lzma equ REG_ABI_PARAM_0 | ||
610 | PARAM_limit equ REG_ABI_PARAM_1 | ||
611 | PARAM_bufLimit equ REG_ABI_PARAM_2 | ||
612 | |||
613 | ; MY_ALIGN_64 | ||
614 | MY_PROC LzmaDec_DecodeReal_3, 3 | ||
615 | MY_PUSH_PRESERVED_ABI_REGS | ||
616 | |||
617 | lea r0, [RSP - (SIZEOF CLzmaDec_Asm_Loc)] | ||
618 | and r0, -128 | ||
619 | mov r5, RSP | ||
620 | mov RSP, r0 | ||
621 | mov LOC_0 Old_RSP, r5 | ||
622 | mov LOC_0 lzmaPtr, PARAM_lzma | ||
623 | |||
624 | mov LOC_0 remainLen, 0 ; remainLen must be ZERO | ||
625 | |||
626 | mov LOC_0 bufLimit, PARAM_bufLimit | ||
627 | mov sym_R, PARAM_lzma ; CLzmaDec_Asm_Loc pointer for GLOB_2 | ||
628 | mov dic, GLOB_2 dic_Spec | ||
629 | add PARAM_limit, dic | ||
630 | mov LOC_0 limit, PARAM_limit | ||
631 | |||
632 | COPY_VAR(rep0) | ||
633 | COPY_VAR(rep1) | ||
634 | COPY_VAR(rep2) | ||
635 | COPY_VAR(rep3) | ||
636 | |||
637 | mov dicPos, GLOB_2 dicPos_Spec | ||
638 | add dicPos, dic | ||
639 | mov LOC_0 dicPos_Spec, dicPos | ||
640 | mov LOC_0 dic_Spec, dic | ||
641 | |||
642 | mov x1_L, GLOB_2 pb | ||
643 | mov t0, 1 | ||
644 | shl t0, x1_L | ||
645 | dec t0 | ||
646 | mov LOC_0 pbMask, t0 | ||
647 | |||
648 | ; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; | ||
649 | ; unsigned lc = p->prop.lc; | ||
650 | ; unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc); | ||
651 | |||
652 | mov x1_L, GLOB_2 lc | ||
653 | mov x2, 100h | ||
654 | mov t0, x2 | ||
655 | shr x2, x1_L | ||
656 | ; inc x1 | ||
657 | add x1_L, PSHIFT | ||
658 | mov LOC_0 lc2, x1 | ||
659 | mov x1_L, GLOB_2 lp | ||
660 | shl t0, x1_L | ||
661 | sub t0, x2 | ||
662 | mov LOC_0 lpMask, t0 | ||
663 | mov lpMask_reg, t0 | ||
664 | |||
665 | ; mov probs, GLOB_2 probs_Spec | ||
666 | ; add probs, kStartOffset SHL PSHIFT | ||
667 | mov probs, GLOB_2 probs_1664 | ||
668 | mov LOC_0 probs_Spec, probs | ||
669 | |||
670 | mov t0_R, GLOB_2 dicBufSize | ||
671 | mov LOC_0 dicBufSize, t0_R | ||
672 | |||
673 | mov x1, GLOB_2 checkDicSize | ||
674 | mov LOC_0 checkDicSize, x1 | ||
675 | |||
676 | mov processedPos, GLOB_2 processedPos_Spec | ||
677 | |||
678 | mov state, GLOB_2 state_Spec | ||
679 | shl state, PSHIFT | ||
680 | |||
681 | mov buf, GLOB_2 buf_Spec | ||
682 | mov range, GLOB_2 range_Spec | ||
683 | mov cod, GLOB_2 code_Spec | ||
684 | mov kBitModelTotal_reg, kBitModelTotal | ||
685 | xor sym, sym | ||
686 | |||
687 | ; if (processedPos != 0 || checkDicSize != 0) | ||
688 | or x1, processedPos | ||
689 | jz @f | ||
690 | |||
691 | add t0_R, dic | ||
692 | cmp dicPos, dic | ||
693 | cmovnz t0_R, dicPos | ||
694 | movzx sym, byte ptr[t0_R - 1] | ||
695 | |||
696 | @@: | ||
697 | IsMatchBranch_Pre | ||
698 | cmp state, 4 * PMULT | ||
699 | jb lit_end | ||
700 | cmp state, kNumLitStates * PMULT | ||
701 | jb lit_matched_end | ||
702 | jmp lz_end | ||
703 | |||
704 | |||
705 | |||
706 | |||
707 | ; ---------- LITERAL ---------- | ||
708 | MY_ALIGN_64 | ||
709 | lit_start: | ||
710 | xor state, state | ||
711 | lit_start_2: | ||
712 | LIT_PROBS lpMask_reg | ||
713 | |||
714 | ifdef _LZMA_SIZE_OPT | ||
715 | |||
716 | PLOAD x1, probs + 1 * PMULT | ||
717 | mov sym, 1 | ||
718 | MY_ALIGN_16 | ||
719 | lit_loop: | ||
720 | BIT_1 x1, x2 | ||
721 | mov x1, x2 | ||
722 | cmp sym, 127 | ||
723 | jbe lit_loop | ||
724 | |||
725 | else | ||
726 | |||
727 | BIT_0 x1, x2 | ||
728 | BIT_1 x2, x1 | ||
729 | BIT_1 x1, x2 | ||
730 | BIT_1 x2, x1 | ||
731 | BIT_1 x1, x2 | ||
732 | BIT_1 x2, x1 | ||
733 | BIT_1 x1, x2 | ||
734 | |||
735 | endif | ||
736 | |||
737 | BIT_2 x2, 256 - 1 | ||
738 | |||
739 | ; mov dic, LOC dic_Spec | ||
740 | mov probs, LOC probs_Spec | ||
741 | IsMatchBranch_Pre | ||
742 | mov byte ptr[dicPos], sym_L | ||
743 | inc dicPos | ||
744 | |||
745 | CheckLimits | ||
746 | lit_end: | ||
747 | IF_BIT_0_NOUP probs_state_R, pbPos_R, IsMatch, lit_start | ||
748 | |||
749 | ; jmp IsMatch_label | ||
750 | |||
751 | ; ---------- MATCHES ---------- | ||
752 | ; MY_ALIGN_32 | ||
753 | IsMatch_label: | ||
754 | UPDATE_1 probs_state_R, pbPos_R, IsMatch | ||
755 | IF_BIT_1 probs_state_R, 0, IsRep, IsRep_label | ||
756 | |||
757 | add probs, LenCoder * PMULT | ||
758 | add state, kNumStates * PMULT | ||
759 | |||
760 | ; ---------- LEN DECODE ---------- | ||
761 | len_decode: | ||
762 | mov len_temp, 8 - 1 - kMatchMinLen | ||
763 | IF_BIT_0_NOUP probs, 0, 0, len_mid_0 | ||
764 | UPDATE_1 probs, 0, 0 | ||
765 | add probs, (1 SHL (kLenNumLowBits + PSHIFT)) | ||
766 | mov len_temp, -1 - kMatchMinLen | ||
767 | IF_BIT_0_NOUP probs, 0, 0, len_mid_0 | ||
768 | UPDATE_1 probs, 0, 0 | ||
769 | add probs, LenHigh * PMULT - (1 SHL (kLenNumLowBits + PSHIFT)) | ||
770 | mov sym, 1 | ||
771 | PLOAD x1, probs + 1 * PMULT | ||
772 | |||
773 | MY_ALIGN_32 | ||
774 | len8_loop: | ||
775 | BIT_1 x1, x2 | ||
776 | mov x1, x2 | ||
777 | cmp sym, 64 | ||
778 | jb len8_loop | ||
779 | |||
780 | mov len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen | ||
781 | jmp short len_mid_2 ; we use short here for MASM that doesn't optimize that code as another assembler programs | ||
782 | |||
783 | MY_ALIGN_32 | ||
784 | len_mid_0: | ||
785 | UPDATE_0 probs, 0, 0 | ||
786 | add probs, pbPos_R | ||
787 | BIT_0 x2, x1 | ||
788 | len_mid_2: | ||
789 | BIT_1 x1, x2 | ||
790 | BIT_2 x2, len_temp | ||
791 | mov probs, LOC probs_Spec | ||
792 | cmp state, kNumStates * PMULT | ||
793 | jb copy_match | ||
794 | |||
795 | |||
796 | ; ---------- DECODE DISTANCE ---------- | ||
797 | ; probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); | ||
798 | |||
799 | mov t0, 3 + kMatchMinLen | ||
800 | cmp sym, 3 + kMatchMinLen | ||
801 | cmovb t0, sym | ||
802 | add probs, PosSlot * PMULT - (kMatchMinLen SHL (kNumPosSlotBits + PSHIFT)) | ||
803 | shl t0, (kNumPosSlotBits + PSHIFT) | ||
804 | add probs, t0_R | ||
805 | |||
806 | ; sym = Len | ||
807 | ; mov LOC remainLen, sym | ||
808 | mov len_temp, sym | ||
809 | |||
810 | ifdef _LZMA_SIZE_OPT | ||
811 | |||
812 | PLOAD x1, probs + 1 * PMULT | ||
813 | mov sym, 1 | ||
814 | MY_ALIGN_16 | ||
815 | slot_loop: | ||
816 | BIT_1 x1, x2 | ||
817 | mov x1, x2 | ||
818 | cmp sym, 32 | ||
819 | jb slot_loop | ||
820 | |||
821 | else | ||
822 | |||
823 | BIT_0 x1, x2 | ||
824 | BIT_1 x2, x1 | ||
825 | BIT_1 x1, x2 | ||
826 | BIT_1 x2, x1 | ||
827 | BIT_1 x1, x2 | ||
828 | |||
829 | endif | ||
830 | |||
831 | mov x1, sym | ||
832 | BIT_2 x2, 64-1 | ||
833 | |||
834 | and sym, 3 | ||
835 | mov probs, LOC probs_Spec | ||
836 | cmp x1, 32 + kEndPosModelIndex / 2 | ||
837 | jb short_dist | ||
838 | |||
839 | ; unsigned numDirectBits = (unsigned)(((distance >> 1) - 1)); | ||
840 | sub x1, (32 + 1 + kNumAlignBits) | ||
841 | ; distance = (2 | (distance & 1)); | ||
842 | or sym, 2 | ||
843 | PLOAD x2, probs + 1 * PMULT | ||
844 | shl sym, kNumAlignBits + 1 | ||
845 | lea sym2_R, [probs + 2 * PMULT] | ||
846 | |||
847 | jmp direct_norm | ||
848 | ; lea t1, [sym_R + (1 SHL kNumAlignBits)] | ||
849 | ; cmp range, kTopValue | ||
850 | ; jb direct_norm | ||
851 | |||
852 | ; ---------- DIRECT DISTANCE ---------- | ||
853 | MY_ALIGN_32 | ||
854 | direct_loop: | ||
855 | shr range, 1 | ||
856 | mov t0, cod | ||
857 | sub cod, range | ||
858 | cmovs cod, t0 | ||
859 | cmovns sym, t1 | ||
860 | |||
861 | comment ~ | ||
862 | sub cod, range | ||
863 | mov x2, cod | ||
864 | sar x2, 31 | ||
865 | lea sym, dword ptr [r2 + sym_R * 2 + 1] | ||
866 | and x2, range | ||
867 | add cod, x2 | ||
868 | ~ | ||
869 | dec x1 | ||
870 | je direct_end | ||
871 | |||
872 | add sym, sym | ||
873 | direct_norm: | ||
874 | lea t1, [sym_R + (1 SHL kNumAlignBits)] | ||
875 | cmp range, kTopValue | ||
876 | jae near ptr direct_loop | ||
877 | ; we align for 32 here with "near ptr" command above | ||
878 | NORM_2 | ||
879 | jmp direct_loop | ||
880 | |||
881 | MY_ALIGN_32 | ||
882 | direct_end: | ||
883 | ; prob = + kAlign; | ||
884 | ; distance <<= kNumAlignBits; | ||
885 | REV_0 x2, x1 | ||
886 | REV_1 x1, x2, 2 | ||
887 | REV_1 x2, x1, 4 | ||
888 | REV_2 x1, 8 | ||
889 | |||
890 | decode_dist_end: | ||
891 | |||
892 | ; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize)) | ||
893 | |||
894 | mov t1, LOC rep0 | ||
895 | mov x1, LOC rep1 | ||
896 | mov x2, LOC rep2 | ||
897 | |||
898 | mov t0, LOC checkDicSize | ||
899 | test t0, t0 | ||
900 | cmove t0, processedPos | ||
901 | cmp sym, t0 | ||
902 | jae end_of_payload | ||
903 | ; jmp end_of_payload ; for debug | ||
904 | |||
905 | ; rep3 = rep2; | ||
906 | ; rep2 = rep1; | ||
907 | ; rep1 = rep0; | ||
908 | ; rep0 = distance + 1; | ||
909 | |||
910 | inc sym | ||
911 | mov LOC rep0, sym | ||
912 | ; mov sym, LOC remainLen | ||
913 | mov sym, len_temp | ||
914 | mov LOC rep1, t1 | ||
915 | mov LOC rep2, x1 | ||
916 | mov LOC rep3, x2 | ||
917 | |||
918 | ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; | ||
919 | cmp state, (kNumStates + kNumLitStates) * PMULT | ||
920 | mov state, kNumLitStates * PMULT | ||
921 | mov t0, (kNumLitStates + 3) * PMULT | ||
922 | cmovae state, t0 | ||
923 | |||
924 | |||
925 | ; ---------- COPY MATCH ---------- | ||
926 | copy_match: | ||
927 | |||
928 | ; len += kMatchMinLen; | ||
929 | ; add sym, kMatchMinLen | ||
930 | |||
931 | ; if ((rem = limit - dicPos) == 0) | ||
932 | ; { | ||
933 | ; p->dicPos = dicPos; | ||
934 | ; return SZ_ERROR_DATA; | ||
935 | ; } | ||
936 | mov cnt_R, LOC limit | ||
937 | sub cnt_R, dicPos | ||
938 | jz fin_dicPos_LIMIT | ||
939 | |||
940 | ; curLen = ((rem < len) ? (unsigned)rem : len); | ||
941 | cmp cnt_R, sym_R | ||
942 | ; cmovae cnt_R, sym_R ; 64-bit | ||
943 | cmovae cnt, sym ; 32-bit | ||
944 | |||
945 | mov dic, LOC dic_Spec | ||
946 | mov x1, LOC rep0 | ||
947 | |||
948 | mov t0_R, dicPos | ||
949 | add dicPos, cnt_R | ||
950 | ; processedPos += curLen; | ||
951 | add processedPos, cnt | ||
952 | ; len -= curLen; | ||
953 | sub sym, cnt | ||
954 | mov LOC remainLen, sym | ||
955 | |||
956 | sub t0_R, dic | ||
957 | |||
958 | ; pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0); | ||
959 | sub t0_R, r1 | ||
960 | jae @f | ||
961 | |||
962 | mov r1, LOC dicBufSize | ||
963 | add t0_R, r1 | ||
964 | sub r1, t0_R | ||
965 | cmp cnt_R, r1 | ||
966 | ja copy_match_cross | ||
967 | @@: | ||
968 | ; if (curLen <= dicBufSize - pos) | ||
969 | |||
970 | ; ---------- COPY MATCH FAST ---------- | ||
971 | ; Byte *dest = dic + dicPos; | ||
972 | ; mov r1, dic | ||
973 | ; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; | ||
974 | ; sub t0_R, dicPos | ||
975 | ; dicPos += curLen; | ||
976 | |||
977 | ; const Byte *lim = dest + curLen; | ||
978 | add t0_R, dic | ||
979 | movzx sym, byte ptr[t0_R] | ||
980 | add t0_R, cnt_R | ||
981 | neg cnt_R | ||
982 | ; lea r1, [dicPos - 1] | ||
983 | copy_common: | ||
984 | dec dicPos | ||
985 | ; cmp LOC rep0, 1 | ||
986 | ; je rep0Label | ||
987 | |||
988 | ; t0_R - src_lim | ||
989 | ; r1 - dest_lim - 1 | ||
990 | ; cnt_R - (-cnt) | ||
991 | |||
992 | IsMatchBranch_Pre | ||
993 | inc cnt_R | ||
994 | jz copy_end | ||
995 | MY_ALIGN_16 | ||
996 | @@: | ||
997 | mov byte ptr[cnt_R * 1 + dicPos], sym_L | ||
998 | movzx sym, byte ptr[cnt_R * 1 + t0_R] | ||
999 | inc cnt_R | ||
1000 | jnz @b | ||
1001 | |||
1002 | copy_end: | ||
1003 | lz_end_match: | ||
1004 | mov byte ptr[dicPos], sym_L | ||
1005 | inc dicPos | ||
1006 | |||
1007 | ; IsMatchBranch_Pre | ||
1008 | CheckLimits | ||
1009 | lz_end: | ||
1010 | IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label | ||
1011 | |||
1012 | |||
1013 | |||
1014 | ; ---------- LITERAL MATCHED ---------- | ||
1015 | |||
1016 | LIT_PROBS LOC lpMask | ||
1017 | |||
1018 | ; matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; | ||
1019 | mov x1, LOC rep0 | ||
1020 | ; mov dic, LOC dic_Spec | ||
1021 | mov LOC dicPos_Spec, dicPos | ||
1022 | |||
1023 | ; state -= (state < 10) ? 3 : 6; | ||
1024 | lea t0, [state_R - 6 * PMULT] | ||
1025 | sub state, 3 * PMULT | ||
1026 | cmp state, 7 * PMULT | ||
1027 | cmovae state, t0 | ||
1028 | |||
1029 | sub dicPos, dic | ||
1030 | sub dicPos, r1 | ||
1031 | jae @f | ||
1032 | add dicPos, LOC dicBufSize | ||
1033 | @@: | ||
1034 | comment ~ | ||
1035 | xor t0, t0 | ||
1036 | sub dicPos, r1 | ||
1037 | cmovb t0_R, LOC dicBufSize | ||
1038 | ~ | ||
1039 | |||
1040 | movzx match, byte ptr[dic + dicPos * 1] | ||
1041 | |||
1042 | ifdef _LZMA_SIZE_OPT | ||
1043 | |||
1044 | mov offs, 256 * PMULT | ||
1045 | shl match, (PSHIFT + 1) | ||
1046 | mov bit, match | ||
1047 | mov sym, 1 | ||
1048 | MY_ALIGN_16 | ||
1049 | litm_loop: | ||
1050 | LITM | ||
1051 | cmp sym, 256 | ||
1052 | jb litm_loop | ||
1053 | sub sym, 256 | ||
1054 | |||
1055 | else | ||
1056 | |||
1057 | LITM_0 | ||
1058 | LITM | ||
1059 | LITM | ||
1060 | LITM | ||
1061 | LITM | ||
1062 | LITM | ||
1063 | LITM | ||
1064 | LITM_2 | ||
1065 | |||
1066 | endif | ||
1067 | |||
1068 | mov probs, LOC probs_Spec | ||
1069 | IsMatchBranch_Pre | ||
1070 | ; mov dic, LOC dic_Spec | ||
1071 | mov dicPos, LOC dicPos_Spec | ||
1072 | mov byte ptr[dicPos], sym_L | ||
1073 | inc dicPos | ||
1074 | |||
1075 | CheckLimits | ||
1076 | lit_matched_end: | ||
1077 | IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label | ||
1078 | ; IsMatchBranch | ||
1079 | mov lpMask_reg, LOC lpMask | ||
1080 | sub state, 3 * PMULT | ||
1081 | jmp lit_start_2 | ||
1082 | |||
1083 | |||
1084 | |||
1085 | ; ---------- REP 0 LITERAL ---------- | ||
1086 | MY_ALIGN_32 | ||
1087 | IsRep0Short_label: | ||
1088 | UPDATE_0 probs_state_R, pbPos_R, IsRep0Long | ||
1089 | |||
1090 | ; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)]; | ||
1091 | mov dic, LOC dic_Spec | ||
1092 | mov t0_R, dicPos | ||
1093 | mov probBranch, LOC rep0 | ||
1094 | sub t0_R, dic | ||
1095 | |||
1096 | sub probs, RepLenCoder * PMULT | ||
1097 | |||
1098 | ; state = state < kNumLitStates ? 9 : 11; | ||
1099 | or state, 1 * PMULT | ||
1100 | |||
1101 | ; the caller doesn't allow (dicPos >= limit) case for REP_SHORT | ||
1102 | ; so we don't need the following (dicPos == limit) check here: | ||
1103 | ; cmp dicPos, LOC limit | ||
1104 | ; jae fin_dicPos_LIMIT_REP_SHORT | ||
1105 | |||
1106 | inc processedPos | ||
1107 | |||
1108 | IsMatchBranch_Pre | ||
1109 | |||
1110 | ; xor sym, sym | ||
1111 | ; sub t0_R, probBranch_R | ||
1112 | ; cmovb sym_R, LOC dicBufSize | ||
1113 | ; add t0_R, sym_R | ||
1114 | sub t0_R, probBranch_R | ||
1115 | jae @f | ||
1116 | add t0_R, LOC dicBufSize | ||
1117 | @@: | ||
1118 | movzx sym, byte ptr[dic + t0_R * 1] | ||
1119 | jmp lz_end_match | ||
1120 | |||
1121 | |||
1122 | MY_ALIGN_32 | ||
1123 | IsRep_label: | ||
1124 | UPDATE_1 probs_state_R, 0, IsRep | ||
1125 | |||
1126 | ; The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode. | ||
1127 | ; So we don't check it here. | ||
1128 | |||
1129 | ; mov t0, processedPos | ||
1130 | ; or t0, LOC checkDicSize | ||
1131 | ; jz fin_ERROR_2 | ||
1132 | |||
1133 | ; state = state < kNumLitStates ? 8 : 11; | ||
1134 | cmp state, kNumLitStates * PMULT | ||
1135 | mov state, 8 * PMULT | ||
1136 | mov probBranch, 11 * PMULT | ||
1137 | cmovae state, probBranch | ||
1138 | |||
1139 | ; prob = probs + RepLenCoder; | ||
1140 | add probs, RepLenCoder * PMULT | ||
1141 | |||
1142 | IF_BIT_1 probs_state_R, 0, IsRepG0, IsRepG0_label | ||
1143 | IF_BIT_0_NOUP probs_state_R, pbPos_R, IsRep0Long, IsRep0Short_label | ||
1144 | UPDATE_1 probs_state_R, pbPos_R, IsRep0Long | ||
1145 | jmp len_decode | ||
1146 | |||
1147 | MY_ALIGN_32 | ||
1148 | IsRepG0_label: | ||
1149 | UPDATE_1 probs_state_R, 0, IsRepG0 | ||
1150 | mov dist2, LOC rep0 | ||
1151 | mov dist, LOC rep1 | ||
1152 | mov LOC rep1, dist2 | ||
1153 | |||
1154 | IF_BIT_1 probs_state_R, 0, IsRepG1, IsRepG1_label | ||
1155 | mov LOC rep0, dist | ||
1156 | jmp len_decode | ||
1157 | |||
1158 | ; MY_ALIGN_32 | ||
1159 | IsRepG1_label: | ||
1160 | UPDATE_1 probs_state_R, 0, IsRepG1 | ||
1161 | mov dist2, LOC rep2 | ||
1162 | mov LOC rep2, dist | ||
1163 | |||
1164 | IF_BIT_1 probs_state_R, 0, IsRepG2, IsRepG2_label | ||
1165 | mov LOC rep0, dist2 | ||
1166 | jmp len_decode | ||
1167 | |||
1168 | ; MY_ALIGN_32 | ||
1169 | IsRepG2_label: | ||
1170 | UPDATE_1 probs_state_R, 0, IsRepG2 | ||
1171 | mov dist, LOC rep3 | ||
1172 | mov LOC rep3, dist2 | ||
1173 | mov LOC rep0, dist | ||
1174 | jmp len_decode | ||
1175 | |||
1176 | |||
1177 | |||
1178 | ; ---------- SPEC SHORT DISTANCE ---------- | ||
1179 | |||
1180 | MY_ALIGN_32 | ||
1181 | short_dist: | ||
1182 | sub x1, 32 + 1 | ||
1183 | jbe decode_dist_end | ||
1184 | or sym, 2 | ||
1185 | shl sym, x1_L | ||
1186 | lea sym_R, [probs + sym_R * PMULT + SpecPos * PMULT + 1 * PMULT] | ||
1187 | mov sym2, PMULT ; step | ||
1188 | MY_ALIGN_32 | ||
1189 | spec_loop: | ||
1190 | REV_1_VAR x2 | ||
1191 | dec x1 | ||
1192 | jnz spec_loop | ||
1193 | |||
1194 | mov probs, LOC probs_Spec | ||
1195 | sub sym, sym2 | ||
1196 | sub sym, SpecPos * PMULT | ||
1197 | sub sym_R, probs | ||
1198 | shr sym, PSHIFT | ||
1199 | |||
1200 | jmp decode_dist_end | ||
1201 | |||
1202 | |||
1203 | ; ---------- COPY MATCH CROSS ---------- | ||
1204 | copy_match_cross: | ||
1205 | ; t0_R - src pos | ||
1206 | ; r1 - len to dicBufSize | ||
1207 | ; cnt_R - total copy len | ||
1208 | |||
1209 | mov t1_R, t0_R ; srcPos | ||
1210 | mov t0_R, dic | ||
1211 | mov r1, LOC dicBufSize ; | ||
1212 | neg cnt_R | ||
1213 | @@: | ||
1214 | movzx sym, byte ptr[t1_R * 1 + t0_R] | ||
1215 | inc t1_R | ||
1216 | mov byte ptr[cnt_R * 1 + dicPos], sym_L | ||
1217 | inc cnt_R | ||
1218 | cmp t1_R, r1 | ||
1219 | jne @b | ||
1220 | |||
1221 | movzx sym, byte ptr[t0_R] | ||
1222 | sub t0_R, cnt_R | ||
1223 | jmp copy_common | ||
1224 | |||
1225 | |||
1226 | |||
1227 | |||
1228 | ; fin_dicPos_LIMIT_REP_SHORT: | ||
1229 | ; mov sym, 1 | ||
1230 | |||
1231 | fin_dicPos_LIMIT: | ||
1232 | mov LOC remainLen, sym | ||
1233 | jmp fin_OK | ||
1234 | ; For more strict mode we can stop decoding with error | ||
1235 | ; mov sym, 1 | ||
1236 | ; jmp fin | ||
1237 | |||
1238 | |||
1239 | fin_ERROR_MATCH_DIST: | ||
1240 | |||
1241 | ; rep3 = rep2; | ||
1242 | ; rep2 = rep1; | ||
1243 | ; rep1 = rep0; | ||
1244 | ; rep0 = distance + 1; | ||
1245 | |||
1246 | add len_temp, kMatchSpecLen_Error_Data | ||
1247 | mov LOC remainLen, len_temp | ||
1248 | |||
1249 | mov LOC rep0, sym | ||
1250 | mov LOC rep1, t1 | ||
1251 | mov LOC rep2, x1 | ||
1252 | mov LOC rep3, x2 | ||
1253 | |||
1254 | ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; | ||
1255 | cmp state, (kNumStates + kNumLitStates) * PMULT | ||
1256 | mov state, kNumLitStates * PMULT | ||
1257 | mov t0, (kNumLitStates + 3) * PMULT | ||
1258 | cmovae state, t0 | ||
1259 | |||
1260 | ; jmp fin_OK | ||
1261 | mov sym, 1 | ||
1262 | jmp fin | ||
1263 | |||
1264 | end_of_payload: | ||
1265 | inc sym | ||
1266 | jnz fin_ERROR_MATCH_DIST | ||
1267 | |||
1268 | mov LOC remainLen, kMatchSpecLenStart | ||
1269 | sub state, kNumStates * PMULT | ||
1270 | |||
1271 | fin_OK: | ||
1272 | xor sym, sym | ||
1273 | |||
1274 | fin: | ||
1275 | NORM | ||
1276 | |||
1277 | mov r1, LOC lzmaPtr | ||
1278 | |||
1279 | sub dicPos, LOC dic_Spec | ||
1280 | mov GLOB dicPos_Spec, dicPos | ||
1281 | mov GLOB buf_Spec, buf | ||
1282 | mov GLOB range_Spec, range | ||
1283 | mov GLOB code_Spec, cod | ||
1284 | shr state, PSHIFT | ||
1285 | mov GLOB state_Spec, state | ||
1286 | mov GLOB processedPos_Spec, processedPos | ||
1287 | |||
1288 | RESTORE_VAR(remainLen) | ||
1289 | RESTORE_VAR(rep0) | ||
1290 | RESTORE_VAR(rep1) | ||
1291 | RESTORE_VAR(rep2) | ||
1292 | RESTORE_VAR(rep3) | ||
1293 | |||
1294 | mov x0, sym | ||
1295 | |||
1296 | mov RSP, LOC Old_RSP | ||
1297 | |||
1298 | MY_POP_PRESERVED_ABI_REGS | ||
1299 | MY_ENDP | ||
1300 | |||
1301 | _TEXT$LZMADECOPT ENDS | ||
1302 | |||
1303 | end | ||