aboutsummaryrefslogtreecommitdiff
path: root/Asm/x86/LzFindOpt.asm
diff options
context:
space:
mode:
Diffstat (limited to 'Asm/x86/LzFindOpt.asm')
-rw-r--r--Asm/x86/LzFindOpt.asm513
1 files changed, 513 insertions, 0 deletions
diff --git a/Asm/x86/LzFindOpt.asm b/Asm/x86/LzFindOpt.asm
new file mode 100644
index 0000000..42e10bd
--- /dev/null
+++ b/Asm/x86/LzFindOpt.asm
@@ -0,0 +1,513 @@
1; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function
2; 2021-07-21: Igor Pavlov : Public domain
3;
4
5ifndef x64
6; x64=1
7; .err <x64_IS_REQUIRED>
8endif
9
10include 7zAsm.asm
11
12MY_ASM_START
13
14_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
15
16MY_ALIGN macro num:req
17 align num
18endm
19
20MY_ALIGN_32 macro
21 MY_ALIGN 32
22endm
23
24MY_ALIGN_64 macro
25 MY_ALIGN 64
26endm
27
28
29t0_L equ x0_L
30t0_x equ x0
31t0 equ r0
32t1_x equ x3
33t1 equ r3
34
35cp_x equ t1_x
36cp_r equ t1
37m equ x5
38m_r equ r5
39len_x equ x6
40len equ r6
41diff_x equ x7
42diff equ r7
43len0 equ r10
44len1_x equ x11
45len1 equ r11
46maxLen_x equ x12
47maxLen equ r12
48d equ r13
49ptr0 equ r14
50ptr1 equ r15
51
52d_lim equ m_r
53cycSize equ len_x
54hash_lim equ len0
55delta1_x equ len1_x
56delta1_r equ len1
57delta_x equ maxLen_x
58delta_r equ maxLen
59hash equ ptr0
60src equ ptr1
61
62
63
64if (IS_LINUX gt 0)
65
66; r1 r2 r8 r9 : win32
67; r7 r6 r2 r1 r8 r9 : linux
68
69lenLimit equ r8
70lenLimit_x equ x8
71; pos_r equ r2
72pos equ x2
73cur equ r1
74son equ r9
75
76else
77
78lenLimit equ REG_ABI_PARAM_2
79lenLimit_x equ REG_ABI_PARAM_2_x
80pos equ REG_ABI_PARAM_1_x
81cur equ REG_ABI_PARAM_0
82son equ REG_ABI_PARAM_3
83
84endif
85
86
87if (IS_LINUX gt 0)
88 maxLen_OFFS equ (REG_SIZE * (6 + 1))
89else
90 cutValue_OFFS equ (REG_SIZE * (8 + 1 + 4))
91 d_OFFS equ (REG_SIZE + cutValue_OFFS)
92 maxLen_OFFS equ (REG_SIZE + d_OFFS)
93endif
94 hash_OFFS equ (REG_SIZE + maxLen_OFFS)
95 limit_OFFS equ (REG_SIZE + hash_OFFS)
96 size_OFFS equ (REG_SIZE + limit_OFFS)
97 cycPos_OFFS equ (REG_SIZE + size_OFFS)
98 cycSize_OFFS equ (REG_SIZE + cycPos_OFFS)
99 posRes_OFFS equ (REG_SIZE + cycSize_OFFS)
100
101if (IS_LINUX gt 0)
102else
103 cutValue_PAR equ [r0 + cutValue_OFFS]
104 d_PAR equ [r0 + d_OFFS]
105endif
106 maxLen_PAR equ [r0 + maxLen_OFFS]
107 hash_PAR equ [r0 + hash_OFFS]
108 limit_PAR equ [r0 + limit_OFFS]
109 size_PAR equ [r0 + size_OFFS]
110 cycPos_PAR equ [r0 + cycPos_OFFS]
111 cycSize_PAR equ [r0 + cycSize_OFFS]
112 posRes_PAR equ [r0 + posRes_OFFS]
113
114
115 cutValue_VAR equ DWORD PTR [r4 + 8 * 0]
116 cutValueCur_VAR equ DWORD PTR [r4 + 8 * 0 + 4]
117 cycPos_VAR equ DWORD PTR [r4 + 8 * 1 + 0]
118 cycSize_VAR equ DWORD PTR [r4 + 8 * 1 + 4]
119 hash_VAR equ QWORD PTR [r4 + 8 * 2]
120 limit_VAR equ QWORD PTR [r4 + 8 * 3]
121 size_VAR equ QWORD PTR [r4 + 8 * 4]
122 distances equ QWORD PTR [r4 + 8 * 5]
123 maxLen_VAR equ QWORD PTR [r4 + 8 * 6]
124
125 Old_RSP equ QWORD PTR [r4 + 8 * 7]
126 LOCAL_SIZE equ 8 * 8
127
128COPY_VAR_32 macro dest_var, src_var
129 mov x3, src_var
130 mov dest_var, x3
131endm
132
133COPY_VAR_64 macro dest_var, src_var
134 mov r3, src_var
135 mov dest_var, r3
136endm
137
138
139; MY_ALIGN_64
140MY_PROC GetMatchesSpecN_2, 13
141MY_PUSH_PRESERVED_ABI_REGS
142 mov r0, RSP
143 lea r3, [r0 - LOCAL_SIZE]
144 and r3, -64
145 mov RSP, r3
146 mov Old_RSP, r0
147
148if (IS_LINUX gt 0)
149 mov d, REG_ABI_PARAM_5 ; r13 = r9
150 mov cutValue_VAR, REG_ABI_PARAM_4_x ; = r8
151 mov son, REG_ABI_PARAM_3 ; r9 = r1
152 mov r8, REG_ABI_PARAM_2 ; r8 = r2
153 mov pos, REG_ABI_PARAM_1_x ; r2 = x6
154 mov r1, REG_ABI_PARAM_0 ; r1 = r7
155else
156 COPY_VAR_32 cutValue_VAR, cutValue_PAR
157 mov d, d_PAR
158endif
159
160 COPY_VAR_64 limit_VAR, limit_PAR
161
162 mov hash_lim, size_PAR
163 mov size_VAR, hash_lim
164
165 mov cp_x, cycPos_PAR
166 mov hash, hash_PAR
167
168 mov cycSize, cycSize_PAR
169 mov cycSize_VAR, cycSize
170
171 ; we want cur in (rcx). So we change the cur and lenLimit variables
172 sub lenLimit, cur
173 neg lenLimit_x
174 inc lenLimit_x
175
176 mov t0_x, maxLen_PAR
177 sub t0, lenLimit
178 mov maxLen_VAR, t0
179
180 jmp main_loop
181
182MY_ALIGN_64
183fill_empty:
184 ; ptr0 = *ptr1 = kEmptyHashValue;
185 mov QWORD PTR [ptr1], 0
186 inc pos
187 inc cp_x
188 mov DWORD PTR [d - 4], 0
189 cmp d, limit_VAR
190 jae fin
191 cmp hash, hash_lim
192 je fin
193
194; MY_ALIGN_64
195main_loop:
196 ; UInt32 delta = *hash++;
197 mov diff_x, [hash] ; delta
198 add hash, 4
199 ; mov cycPos_VAR, cp_x
200
201 inc cur
202 add d, 4
203 mov m, pos
204 sub m, diff_x; ; matchPos
205
206 ; CLzRef *ptr1 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2;
207 lea ptr1, [son + 8 * cp_r]
208 ; mov cycSize, cycSize_VAR
209 cmp pos, cycSize
210 jb directMode ; if (pos < cycSize_VAR)
211
212 ; CYC MODE
213
214 cmp diff_x, cycSize
215 jae fill_empty ; if (delta >= cycSize_VAR)
216
217 xor t0_x, t0_x
218 mov cycPos_VAR, cp_x
219 sub cp_x, diff_x
220 ; jae prepare_for_tree_loop
221 ; add cp_x, cycSize
222 cmovb t0_x, cycSize
223 add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)
224 jmp prepare_for_tree_loop
225
226
227directMode:
228 cmp diff_x, pos
229 je fill_empty ; if (delta == pos)
230 jae fin_error ; if (delta >= pos)
231
232 mov cycPos_VAR, cp_x
233 mov cp_x, m
234
235prepare_for_tree_loop:
236 mov len0, lenLimit
237 mov hash_VAR, hash
238 ; CLzRef *ptr0 = son + ((size_t)(pos) << 1) - CYC_TO_POS_OFFSET * 2 + 1;
239 lea ptr0, [ptr1 + 4]
240 ; UInt32 *_distances = ++d;
241 mov distances, d
242
243 neg len0
244 mov len1, len0
245
246 mov t0_x, cutValue_VAR
247 mov maxLen, maxLen_VAR
248 mov cutValueCur_VAR, t0_x
249
250MY_ALIGN_32
251tree_loop:
252 neg diff
253 mov len, len0
254 cmp len1, len0
255 cmovb len, len1 ; len = (len1 < len0 ? len1 : len0);
256 add diff, cur
257
258 mov t0_x, [son + cp_r * 8] ; prefetch
259 movzx t0_x, BYTE PTR [diff + 1 * len]
260 lea cp_r, [son + cp_r * 8]
261 cmp [cur + 1 * len], t0_L
262 je matched_1
263
264 jb left_0
265
266 mov [ptr1], m
267 mov m, [cp_r + 4]
268 lea ptr1, [cp_r + 4]
269 sub diff, cur ; FIX32
270 jmp next_node
271
272MY_ALIGN_32
273left_0:
274 mov [ptr0], m
275 mov m, [cp_r]
276 mov ptr0, cp_r
277 sub diff, cur ; FIX32
278 ; jmp next_node
279
280; ------------ NEXT NODE ------------
281; MY_ALIGN_32
282next_node:
283 mov cycSize, cycSize_VAR
284 dec cutValueCur_VAR
285 je finish_tree
286
287 add diff_x, pos ; prev_match = pos + diff
288 cmp m, diff_x
289 jae fin_error ; if (new_match >= prev_match)
290
291 mov diff_x, pos
292 sub diff_x, m ; delta = pos - new_match
293 cmp pos, cycSize
294 jae cyc_mode_2 ; if (pos >= cycSize)
295
296 mov cp_x, m
297 test m, m
298 jne tree_loop ; if (m != 0)
299
300finish_tree:
301 ; ptr0 = *ptr1 = kEmptyHashValue;
302 mov DWORD PTR [ptr0], 0
303 mov DWORD PTR [ptr1], 0
304
305 inc pos
306
307 ; _distances[-1] = (UInt32)(d - _distances);
308 mov t0, distances
309 mov t1, d
310 sub t1, t0
311 shr t1_x, 2
312 mov [t0 - 4], t1_x
313
314 cmp d, limit_VAR
315 jae fin ; if (d >= limit)
316
317 mov cp_x, cycPos_VAR
318 mov hash, hash_VAR
319 mov hash_lim, size_VAR
320 inc cp_x
321 cmp hash, hash_lim
322 jne main_loop ; if (hash != size)
323 jmp fin
324
325
326MY_ALIGN_32
327cyc_mode_2:
328 cmp diff_x, cycSize
329 jae finish_tree ; if (delta >= cycSize)
330
331 mov cp_x, cycPos_VAR
332 xor t0_x, t0_x
333 sub cp_x, diff_x ; cp_x = cycPos - delta
334 cmovb t0_x, cycSize
335 add cp_x, t0_x ; cp_x += (cycPos < delta ? cycSize : 0)
336 jmp tree_loop
337
338
339MY_ALIGN_32
340matched_1:
341
342 inc len
343 ; cmp len_x, lenLimit_x
344 je short lenLimit_reach
345 movzx t0_x, BYTE PTR [diff + 1 * len]
346 cmp [cur + 1 * len], t0_L
347 jne mismatch
348
349
350MY_ALIGN_32
351match_loop:
352 ; while (++len != lenLimit) (len[diff] != len[0]) ;
353
354 inc len
355 ; cmp len_x, lenLimit_x
356 je short lenLimit_reach
357 movzx t0_x, BYTE PTR [diff + 1 * len]
358 cmp BYTE PTR [cur + 1 * len], t0_L
359 je match_loop
360
361mismatch:
362 jb left_2
363
364 mov [ptr1], m
365 mov m, [cp_r + 4]
366 lea ptr1, [cp_r + 4]
367 mov len1, len
368
369 jmp max_update
370
371MY_ALIGN_32
372left_2:
373 mov [ptr0], m
374 mov m, [cp_r]
375 mov ptr0, cp_r
376 mov len0, len
377
378max_update:
379 sub diff, cur ; restore diff
380
381 cmp maxLen, len
382 jae next_node
383
384 mov maxLen, len
385 add len, lenLimit
386 mov [d], len_x
387 mov t0_x, diff_x
388 not t0_x
389 mov [d + 4], t0_x
390 add d, 8
391
392 jmp next_node
393
394
395
396MY_ALIGN_32
397lenLimit_reach:
398
399 mov delta_r, cur
400 sub delta_r, diff
401 lea delta1_r, [delta_r - 1]
402
403 mov t0_x, [cp_r]
404 mov [ptr1], t0_x
405 mov t0_x, [cp_r + 4]
406 mov [ptr0], t0_x
407
408 mov [d], lenLimit_x
409 mov [d + 4], delta1_x
410 add d, 8
411
412 ; _distances[-1] = (UInt32)(d - _distances);
413 mov t0, distances
414 mov t1, d
415 sub t1, t0
416 shr t1_x, 2
417 mov [t0 - 4], t1_x
418
419 mov hash, hash_VAR
420 mov hash_lim, size_VAR
421
422 inc pos
423 mov cp_x, cycPos_VAR
424 inc cp_x
425
426 mov d_lim, limit_VAR
427 mov cycSize, cycSize_VAR
428 ; if (hash == size || *hash != delta || lenLimit[diff] != lenLimit[0] || d >= limit)
429 ; break;
430 cmp hash, hash_lim
431 je fin
432 cmp d, d_lim
433 jae fin
434 cmp delta_x, [hash]
435 jne main_loop
436 movzx t0_x, BYTE PTR [diff]
437 cmp [cur], t0_L
438 jne main_loop
439
440 ; jmp main_loop ; bypass for debug
441
442 mov cycPos_VAR, cp_x
443 shl len, 3 ; cycSize * 8
444 sub diff, cur ; restore diff
445 xor t0_x, t0_x
446 cmp cp_x, delta_x ; cmp (cycPos_VAR, delta)
447 lea cp_r, [son + 8 * cp_r] ; dest
448 lea src, [cp_r + 8 * diff]
449 cmovb t0, len ; t0 = (cycPos_VAR < delta ? cycSize * 8 : 0)
450 add src, t0
451 add len, son ; len = son + cycSize * 8
452
453
454MY_ALIGN_32
455long_loop:
456 add hash, 4
457
458 ; *(UInt64 *)(void *)ptr = ((const UInt64 *)(const void *)ptr)[diff];
459
460 mov t0, [src]
461 add src, 8
462 mov [cp_r], t0
463 add cp_r, 8
464 cmp src, len
465 cmove src, son ; if end of (son) buffer is reached, we wrap to begin
466
467 mov DWORD PTR [d], 2
468 mov [d + 4], lenLimit_x
469 mov [d + 8], delta1_x
470 add d, 12
471
472 inc cur
473
474 cmp hash, hash_lim
475 je long_footer
476 cmp delta_x, [hash]
477 jne long_footer
478 movzx t0_x, BYTE PTR [diff + 1 * cur]
479 cmp [cur], t0_L
480 jne long_footer
481 cmp d, d_lim
482 jb long_loop
483
484long_footer:
485 sub cp_r, son
486 shr cp_r, 3
487 add pos, cp_x
488 sub pos, cycPos_VAR
489 mov cycSize, cycSize_VAR
490
491 cmp d, d_lim
492 jae fin
493 cmp hash, hash_lim
494 jne main_loop
495 jmp fin
496
497
498
499fin_error:
500 xor d, d
501
502fin:
503 mov RSP, Old_RSP
504 mov t0, [r4 + posRes_OFFS]
505 mov [t0], pos
506 mov r0, d
507
508MY_POP_PRESERVED_ABI_REGS
509MY_ENDP
510
511_TEXT$LZFINDOPT ENDS
512
513end