diff options
author | Mark Adler <madler@alumni.caltech.edu> | 2011-09-09 23:24:43 -0700 |
---|---|---|
committer | Mark Adler <madler@alumni.caltech.edu> | 2011-09-09 23:24:43 -0700 |
commit | 6b8233bfe00e79134cb1b84fc49d4f750a797f79 (patch) | |
tree | ca2b03b0169568681dc3d9c823e9f0bc4417d6b5 /contrib/masmx86/inffas32.asm | |
parent | 0484693e1723bbab791c56f95597bd7dbe867d03 (diff) | |
download | zlib-1.2.2.3.tar.gz zlib-1.2.2.3.tar.bz2 zlib-1.2.2.3.zip |
zlib 1.2.2.3v1.2.2.3
Diffstat (limited to 'contrib/masmx86/inffas32.asm')
-rw-r--r-- | contrib/masmx86/inffas32.asm | 2119 |
1 files changed, 1083 insertions, 1036 deletions
diff --git a/contrib/masmx86/inffas32.asm b/contrib/masmx86/inffas32.asm index 531bcef..4a20512 100644 --- a/contrib/masmx86/inffas32.asm +++ b/contrib/masmx86/inffas32.asm | |||
@@ -1,1036 +1,1083 @@ | |||
1 | ; 75 "inffast.S" | 1 | ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding |
2 | ;FILE "inffast.S" | 2 | ; * |
3 | 3 | ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code | |
4 | ;;;GLOBAL _inflate_fast | 4 | ; * |
5 | 5 | ; * Copyright (C) 1995-2003 Mark Adler | |
6 | ;;;SECTION .text | 6 | ; * For conditions of distribution and use, see copyright notice in zlib.h |
7 | 7 | ; * | |
8 | 8 | ; * Copyright (C) 2003 Chris Anderson <christop@charm.net> | |
9 | 9 | ; * Please use the copyright conditions above. | |
10 | .586p | 10 | ; * |
11 | .mmx | 11 | ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from |
12 | 12 | ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at | |
13 | name inflate_fast_x86 | 13 | ; * the moment. I have successfully compiled and tested this code with gcc2.96, |
14 | .MODEL FLAT | 14 | ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S |
15 | 15 | ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX | |
16 | _DATA segment | 16 | ; * enabled. I will attempt to merge the MMX code into this version. Newer |
17 | inflate_fast_use_mmx: | 17 | ; * versions of this and inffast.S can be found at |
18 | dd 1 | 18 | ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ |
19 | 19 | ; * | |
20 | 20 | ; * 2005 : modification by Gilles Vollant | |
21 | _TEXT segment | 21 | ; */ |
22 | PUBLIC _inflate_fast | 22 | ; For Visual C++ 4.x and higher and ML 6.x and higher |
23 | 23 | ; ml.exe is in directory \MASM611C of Win95 DDK | |
24 | ALIGN 4 | 24 | ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm |
25 | _inflate_fast: | 25 | ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ |
26 | jmp inflate_fast_entry | 26 | ; |
27 | 27 | ; | |
28 | 28 | ; compile with command line option | |
29 | 29 | ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm | |
30 | ALIGN 4 | 30 | |
31 | db 'Fast decoding Code from Chris Anderson' | 31 | ; if you define NO_GZIP (see inflate.h), compile with |
32 | db 0 | 32 | ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm |
33 | 33 | ||
34 | ALIGN 4 | 34 | |
35 | invalid_literal_length_code_msg: | 35 | ; zlib122sup is 0 fort zlib 1.2.2.1 and lower |
36 | db 'invalid literal/length code' | 36 | ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head |
37 | db 0 | 37 | ; in inflate_state in inflate.h) |
38 | 38 | zlib1222sup equ 8 | |
39 | ALIGN 4 | 39 | |
40 | invalid_distance_code_msg: | 40 | |
41 | db 'invalid distance code' | 41 | IFDEF GUNZIP |
42 | db 0 | 42 | INFLATE_MODE_TYPE equ 11 |
43 | 43 | INFLATE_MODE_BAD equ 26 | |
44 | ALIGN 4 | 44 | ELSE |
45 | invalid_distance_too_far_msg: | 45 | IFNDEF NO_GUNZIP |
46 | db 'invalid distance too far back' | 46 | INFLATE_MODE_TYPE equ 11 |
47 | db 0 | 47 | INFLATE_MODE_BAD equ 26 |
48 | 48 | ELSE | |
49 | 49 | INFLATE_MODE_TYPE equ 3 | |
50 | ALIGN 4 | 50 | INFLATE_MODE_BAD equ 17 |
51 | inflate_fast_mask: | 51 | ENDIF |
52 | dd 0 | 52 | ENDIF |
53 | dd 1 | 53 | |
54 | dd 3 | 54 | |
55 | dd 7 | 55 | ; 75 "inffast.S" |
56 | dd 15 | 56 | ;FILE "inffast.S" |
57 | dd 31 | 57 | |
58 | dd 63 | 58 | ;;;GLOBAL _inflate_fast |
59 | dd 127 | 59 | |
60 | dd 255 | 60 | ;;;SECTION .text |
61 | dd 511 | 61 | |
62 | dd 1023 | 62 | |
63 | dd 2047 | 63 | |
64 | dd 4095 | 64 | .586p |
65 | dd 8191 | 65 | .mmx |
66 | dd 16383 | 66 | |
67 | dd 32767 | 67 | name inflate_fast_x86 |
68 | dd 65535 | 68 | .MODEL FLAT |
69 | dd 131071 | 69 | |
70 | dd 262143 | 70 | _DATA segment |
71 | dd 524287 | 71 | inflate_fast_use_mmx: |
72 | dd 1048575 | 72 | dd 1 |
73 | dd 2097151 | 73 | |
74 | dd 4194303 | 74 | |
75 | dd 8388607 | 75 | _TEXT segment |
76 | dd 16777215 | 76 | PUBLIC _inflate_fast |
77 | dd 33554431 | 77 | |
78 | dd 67108863 | 78 | ALIGN 4 |
79 | dd 134217727 | 79 | _inflate_fast: |
80 | dd 268435455 | 80 | jmp inflate_fast_entry |
81 | dd 536870911 | 81 | |
82 | dd 1073741823 | 82 | |
83 | dd 2147483647 | 83 | |
84 | dd 4294967295 | 84 | ALIGN 4 |
85 | 85 | db 'Fast decoding Code from Chris Anderson' | |
86 | 86 | db 0 | |
87 | ; head was added in zlib 1.2.2.1, so we add addstr | 87 | |
88 | ; set addstr to 0 with zlib 1.2.1 of below | 88 | ALIGN 4 |
89 | addstr equ 4 | 89 | invalid_literal_length_code_msg: |
90 | 90 | db 'invalid literal/length code' | |
91 | mode_state equ 0 ;/* state->mode */ | 91 | db 0 |
92 | wsize_state equ 32+addstr ;/* state->wsize */ | 92 | |
93 | write_state equ (36+4+addstr) ;/* state->write */ | 93 | ALIGN 4 |
94 | window_state equ (40+4+addstr) ;/* state->window */ | 94 | invalid_distance_code_msg: |
95 | hold_state equ (44+4+addstr) ;/* state->hold */ | 95 | db 'invalid distance code' |
96 | bits_state equ (48+4+addstr) ;/* state->bits */ | 96 | db 0 |
97 | lencode_state equ (64+4+addstr) ;/* state->lencode */ | 97 | |
98 | distcode_state equ (68+4+addstr) ;/* state->distcode */ | 98 | ALIGN 4 |
99 | lenbits_state equ (72+4+addstr) ;/* state->lenbits */ | 99 | invalid_distance_too_far_msg: |
100 | distbits_state equ (76+4+addstr) ;/* state->distbits */ | 100 | db 'invalid distance too far back' |
101 | 101 | db 0 | |
102 | 102 | ||
103 | ;;SECTION .text | 103 | |
104 | ; 205 "inffast.S" | 104 | ALIGN 4 |
105 | ;GLOBAL inflate_fast_use_mmx | 105 | inflate_fast_mask: |
106 | 106 | dd 0 | |
107 | ;SECTION .data | 107 | dd 1 |
108 | 108 | dd 3 | |
109 | 109 | dd 7 | |
110 | ; GLOBAL inflate_fast_use_mmx:object | 110 | dd 15 |
111 | ;.size inflate_fast_use_mmx, 4 | 111 | dd 31 |
112 | ; 226 "inffast.S" | 112 | dd 63 |
113 | ;SECTION .text | 113 | dd 127 |
114 | 114 | dd 255 | |
115 | ALIGN 4 | 115 | dd 511 |
116 | inflate_fast_entry: | 116 | dd 1023 |
117 | push edi | 117 | dd 2047 |
118 | push esi | 118 | dd 4095 |
119 | push ebp | 119 | dd 8191 |
120 | push ebx | 120 | dd 16383 |
121 | pushfd | 121 | dd 32767 |
122 | sub esp,64 | 122 | dd 65535 |
123 | cld | 123 | dd 131071 |
124 | 124 | dd 262143 | |
125 | 125 | dd 524287 | |
126 | 126 | dd 1048575 | |
127 | 127 | dd 2097151 | |
128 | mov esi, [esp+88] | 128 | dd 4194303 |
129 | mov edi, [esi+28] | 129 | dd 8388607 |
130 | 130 | dd 16777215 | |
131 | 131 | dd 33554431 | |
132 | 132 | dd 67108863 | |
133 | 133 | dd 134217727 | |
134 | 134 | dd 268435455 | |
135 | 135 | dd 536870911 | |
136 | 136 | dd 1073741823 | |
137 | mov edx, [esi+4] | 137 | dd 2147483647 |
138 | mov eax, [esi+0] | 138 | dd 4294967295 |
139 | 139 | ||
140 | add edx,eax | 140 | |
141 | sub edx,11 | 141 | mode_state equ 0 ;/* state->mode */ |
142 | 142 | wsize_state equ (32+zlib1222sup) ;/* state->wsize */ | |
143 | mov [esp+44],eax | 143 | write_state equ (36+4+zlib1222sup) ;/* state->write */ |
144 | mov [esp+20],edx | 144 | window_state equ (40+4+zlib1222sup) ;/* state->window */ |
145 | 145 | hold_state equ (44+4+zlib1222sup) ;/* state->hold */ | |
146 | mov ebp, [esp+92] | 146 | bits_state equ (48+4+zlib1222sup) ;/* state->bits */ |
147 | mov ecx, [esi+16] | 147 | lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */ |
148 | mov ebx, [esi+12] | 148 | distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */ |
149 | 149 | lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */ | |
150 | sub ebp,ecx | 150 | distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */ |
151 | neg ebp | 151 | |
152 | add ebp,ebx | 152 | |
153 | 153 | ;;SECTION .text | |
154 | sub ecx,257 | 154 | ; 205 "inffast.S" |
155 | add ecx,ebx | 155 | ;GLOBAL inflate_fast_use_mmx |
156 | 156 | ||
157 | mov [esp+60],ebx | 157 | ;SECTION .data |
158 | mov [esp+40],ebp | 158 | |
159 | mov [esp+16],ecx | 159 | |
160 | ; 285 "inffast.S" | 160 | ; GLOBAL inflate_fast_use_mmx:object |
161 | mov eax, [edi+lencode_state] | 161 | ;.size inflate_fast_use_mmx, 4 |
162 | mov ecx, [edi+distcode_state] | 162 | ; 226 "inffast.S" |
163 | 163 | ;SECTION .text | |
164 | mov [esp+8],eax | 164 | |
165 | mov [esp+12],ecx | 165 | ALIGN 4 |
166 | 166 | inflate_fast_entry: | |
167 | mov eax,1 | 167 | push edi |
168 | mov ecx, [edi+lenbits_state] | 168 | push esi |
169 | shl eax,cl | 169 | push ebp |
170 | dec eax | 170 | push ebx |
171 | mov [esp+0],eax | 171 | pushfd |
172 | 172 | sub esp,64 | |
173 | mov eax,1 | 173 | cld |
174 | mov ecx, [edi+distbits_state] | 174 | |
175 | shl eax,cl | 175 | |
176 | dec eax | 176 | |
177 | mov [esp+4],eax | 177 | |
178 | 178 | mov esi, [esp+88] | |
179 | mov eax, [edi+wsize_state] | 179 | mov edi, [esi+28] |
180 | mov ecx, [edi+write_state] | 180 | |
181 | mov edx, [edi+window_state] | 181 | |
182 | 182 | ||
183 | mov [esp+52],eax | 183 | |
184 | mov [esp+48],ecx | 184 | |
185 | mov [esp+56],edx | 185 | |
186 | 186 | ||
187 | mov ebp, [edi+hold_state] | 187 | mov edx, [esi+4] |
188 | mov ebx, [edi+bits_state] | 188 | mov eax, [esi+0] |
189 | ; 321 "inffast.S" | 189 | |
190 | mov esi, [esp+44] | 190 | add edx,eax |
191 | mov ecx, [esp+20] | 191 | sub edx,11 |
192 | cmp ecx,esi | 192 | |
193 | ja L_align_long | 193 | mov [esp+44],eax |
194 | 194 | mov [esp+20],edx | |
195 | add ecx,11 | 195 | |
196 | sub ecx,esi | 196 | mov ebp, [esp+92] |
197 | mov eax,12 | 197 | mov ecx, [esi+16] |
198 | sub eax,ecx | 198 | mov ebx, [esi+12] |
199 | lea edi, [esp+28] | 199 | |
200 | rep movsb | 200 | sub ebp,ecx |
201 | mov ecx,eax | 201 | neg ebp |
202 | xor eax,eax | 202 | add ebp,ebx |
203 | rep stosb | 203 | |
204 | lea esi, [esp+28] | 204 | sub ecx,257 |
205 | mov [esp+20],esi | 205 | add ecx,ebx |
206 | jmp L_is_aligned | 206 | |
207 | 207 | mov [esp+60],ebx | |
208 | 208 | mov [esp+40],ebp | |
209 | L_align_long: | 209 | mov [esp+16],ecx |
210 | test esi,3 | 210 | ; 285 "inffast.S" |
211 | jz L_is_aligned | 211 | mov eax, [edi+lencode_state] |
212 | xor eax,eax | 212 | mov ecx, [edi+distcode_state] |
213 | mov al, [esi] | 213 | |
214 | inc esi | 214 | mov [esp+8],eax |
215 | mov ecx,ebx | 215 | mov [esp+12],ecx |
216 | add ebx,8 | 216 | |
217 | shl eax,cl | 217 | mov eax,1 |
218 | or ebp,eax | 218 | mov ecx, [edi+lenbits_state] |
219 | jmp L_align_long | 219 | shl eax,cl |
220 | 220 | dec eax | |
221 | L_is_aligned: | 221 | mov [esp+0],eax |
222 | mov edi, [esp+60] | 222 | |
223 | ; 366 "inffast.S" | 223 | mov eax,1 |
224 | L_check_mmx: | 224 | mov ecx, [edi+distbits_state] |
225 | cmp dword ptr [inflate_fast_use_mmx],2 | 225 | shl eax,cl |
226 | je L_init_mmx | 226 | dec eax |
227 | ja L_do_loop | 227 | mov [esp+4],eax |
228 | 228 | ||
229 | push eax | 229 | mov eax, [edi+wsize_state] |
230 | push ebx | 230 | mov ecx, [edi+write_state] |
231 | push ecx | 231 | mov edx, [edi+window_state] |
232 | push edx | 232 | |
233 | pushfd | 233 | mov [esp+52],eax |
234 | mov eax, [esp] | 234 | mov [esp+48],ecx |
235 | xor dword ptr [esp],0200000h | 235 | mov [esp+56],edx |
236 | 236 | ||
237 | 237 | mov ebp, [edi+hold_state] | |
238 | 238 | mov ebx, [edi+bits_state] | |
239 | 239 | ; 321 "inffast.S" | |
240 | popfd | 240 | mov esi, [esp+44] |
241 | pushfd | 241 | mov ecx, [esp+20] |
242 | pop edx | 242 | cmp ecx,esi |
243 | xor edx,eax | 243 | ja L_align_long |
244 | jz L_dont_use_mmx | 244 | |
245 | xor eax,eax | 245 | add ecx,11 |
246 | cpuid | 246 | sub ecx,esi |
247 | cmp ebx,0756e6547h | 247 | mov eax,12 |
248 | jne L_dont_use_mmx | 248 | sub eax,ecx |
249 | cmp ecx,06c65746eh | 249 | lea edi, [esp+28] |
250 | jne L_dont_use_mmx | 250 | rep movsb |
251 | cmp edx,049656e69h | 251 | mov ecx,eax |
252 | jne L_dont_use_mmx | 252 | xor eax,eax |
253 | mov eax,1 | 253 | rep stosb |
254 | cpuid | 254 | lea esi, [esp+28] |
255 | shr eax,8 | 255 | mov [esp+20],esi |
256 | and eax,15 | 256 | jmp L_is_aligned |
257 | cmp eax,6 | 257 | |
258 | jne L_dont_use_mmx | 258 | |
259 | test edx,0800000h | 259 | L_align_long: |
260 | jnz L_use_mmx | 260 | test esi,3 |
261 | jmp L_dont_use_mmx | 261 | jz L_is_aligned |
262 | L_use_mmx: | 262 | xor eax,eax |
263 | mov dword ptr [inflate_fast_use_mmx],2 | 263 | mov al, [esi] |
264 | jmp L_check_mmx_pop | 264 | inc esi |
265 | L_dont_use_mmx: | 265 | mov ecx,ebx |
266 | mov dword ptr [inflate_fast_use_mmx],3 | 266 | add ebx,8 |
267 | L_check_mmx_pop: | 267 | shl eax,cl |
268 | pop edx | 268 | or ebp,eax |
269 | pop ecx | 269 | jmp L_align_long |
270 | pop ebx | 270 | |
271 | pop eax | 271 | L_is_aligned: |
272 | jmp L_check_mmx | 272 | mov edi, [esp+60] |
273 | ; 426 "inffast.S" | 273 | ; 366 "inffast.S" |
274 | ALIGN 4 | 274 | L_check_mmx: |
275 | L_do_loop: | 275 | cmp dword ptr [inflate_fast_use_mmx],2 |
276 | ; 437 "inffast.S" | 276 | je L_init_mmx |
277 | cmp bl,15 | 277 | ja L_do_loop |
278 | ja L_get_length_code | 278 | |
279 | 279 | push eax | |
280 | xor eax,eax | 280 | push ebx |
281 | lodsw | 281 | push ecx |
282 | mov cl,bl | 282 | push edx |
283 | add bl,16 | 283 | pushfd |
284 | shl eax,cl | 284 | mov eax, [esp] |
285 | or ebp,eax | 285 | xor dword ptr [esp],0200000h |
286 | 286 | ||
287 | L_get_length_code: | 287 | |
288 | mov edx, [esp+0] | 288 | |
289 | mov ecx, [esp+8] | 289 | |
290 | and edx,ebp | 290 | popfd |
291 | mov eax, [ecx+edx*4] | 291 | pushfd |
292 | 292 | pop edx | |
293 | L_dolen: | 293 | xor edx,eax |
294 | 294 | jz L_dont_use_mmx | |
295 | 295 | xor eax,eax | |
296 | 296 | cpuid | |
297 | 297 | cmp ebx,0756e6547h | |
298 | 298 | jne L_dont_use_mmx | |
299 | 299 | cmp ecx,06c65746eh | |
300 | mov cl,ah | 300 | jne L_dont_use_mmx |
301 | sub bl,ah | 301 | cmp edx,049656e69h |
302 | shr ebp,cl | 302 | jne L_dont_use_mmx |
303 | 303 | mov eax,1 | |
304 | 304 | cpuid | |
305 | 305 | shr eax,8 | |
306 | 306 | and eax,15 | |
307 | 307 | cmp eax,6 | |
308 | 308 | jne L_dont_use_mmx | |
309 | test al,al | 309 | test edx,0800000h |
310 | jnz L_test_for_length_base | 310 | jnz L_use_mmx |
311 | 311 | jmp L_dont_use_mmx | |
312 | shr eax,16 | 312 | L_use_mmx: |
313 | stosb | 313 | mov dword ptr [inflate_fast_use_mmx],2 |
314 | 314 | jmp L_check_mmx_pop | |
315 | L_while_test: | 315 | L_dont_use_mmx: |
316 | 316 | mov dword ptr [inflate_fast_use_mmx],3 | |
317 | 317 | L_check_mmx_pop: | |
318 | cmp [esp+16],edi | 318 | pop edx |
319 | jbe L_break_loop | 319 | pop ecx |
320 | 320 | pop ebx | |
321 | cmp [esp+20],esi | 321 | pop eax |
322 | ja L_do_loop | 322 | jmp L_check_mmx |
323 | jmp L_break_loop | 323 | ; 426 "inffast.S" |
324 | 324 | ALIGN 4 | |
325 | L_test_for_length_base: | 325 | L_do_loop: |
326 | ; 502 "inffast.S" | 326 | ; 437 "inffast.S" |
327 | mov edx,eax | 327 | cmp bl,15 |
328 | shr edx,16 | 328 | ja L_get_length_code |
329 | mov cl,al | 329 | |
330 | 330 | xor eax,eax | |
331 | test al,16 | 331 | lodsw |
332 | jz L_test_for_second_level_length | 332 | mov cl,bl |
333 | and cl,15 | 333 | add bl,16 |
334 | jz L_save_len | 334 | shl eax,cl |
335 | cmp bl,cl | 335 | or ebp,eax |
336 | jae L_add_bits_to_len | 336 | |
337 | 337 | L_get_length_code: | |
338 | mov ch,cl | 338 | mov edx, [esp+0] |
339 | xor eax,eax | 339 | mov ecx, [esp+8] |
340 | lodsw | 340 | and edx,ebp |
341 | mov cl,bl | 341 | mov eax, [ecx+edx*4] |
342 | add bl,16 | 342 | |
343 | shl eax,cl | 343 | L_dolen: |
344 | or ebp,eax | 344 | |
345 | mov cl,ch | 345 | |
346 | 346 | ||
347 | L_add_bits_to_len: | 347 | |
348 | mov eax,1 | 348 | |
349 | shl eax,cl | 349 | |
350 | dec eax | 350 | mov cl,ah |
351 | sub bl,cl | 351 | sub bl,ah |
352 | and eax,ebp | 352 | shr ebp,cl |
353 | shr ebp,cl | 353 | |
354 | add edx,eax | 354 | |
355 | 355 | ||
356 | L_save_len: | 356 | |
357 | mov [esp+24],edx | 357 | |
358 | 358 | ||
359 | 359 | test al,al | |
360 | L_decode_distance: | 360 | jnz L_test_for_length_base |
361 | ; 549 "inffast.S" | 361 | |
362 | cmp bl,15 | 362 | shr eax,16 |
363 | ja L_get_distance_code | 363 | stosb |
364 | 364 | ||
365 | xor eax,eax | 365 | L_while_test: |
366 | lodsw | 366 | |
367 | mov cl,bl | 367 | |
368 | add bl,16 | 368 | cmp [esp+16],edi |
369 | shl eax,cl | 369 | jbe L_break_loop |
370 | or ebp,eax | 370 | |
371 | 371 | cmp [esp+20],esi | |
372 | L_get_distance_code: | 372 | ja L_do_loop |
373 | mov edx, [esp+4] | 373 | jmp L_break_loop |
374 | mov ecx, [esp+12] | 374 | |
375 | and edx,ebp | 375 | L_test_for_length_base: |
376 | mov eax, [ecx+edx*4] | 376 | ; 502 "inffast.S" |
377 | 377 | mov edx,eax | |
378 | 378 | shr edx,16 | |
379 | L_dodist: | 379 | mov cl,al |
380 | mov edx,eax | 380 | |
381 | shr edx,16 | 381 | test al,16 |
382 | mov cl,ah | 382 | jz L_test_for_second_level_length |
383 | sub bl,ah | 383 | and cl,15 |
384 | shr ebp,cl | 384 | jz L_save_len |
385 | ; 584 "inffast.S" | 385 | cmp bl,cl |
386 | mov cl,al | 386 | jae L_add_bits_to_len |
387 | 387 | ||
388 | test al,16 | 388 | mov ch,cl |
389 | jz L_test_for_second_level_dist | 389 | xor eax,eax |
390 | and cl,15 | 390 | lodsw |
391 | jz L_check_dist_one | 391 | mov cl,bl |
392 | cmp bl,cl | 392 | add bl,16 |
393 | jae L_add_bits_to_dist | 393 | shl eax,cl |
394 | 394 | or ebp,eax | |
395 | mov ch,cl | 395 | mov cl,ch |
396 | xor eax,eax | 396 | |
397 | lodsw | 397 | L_add_bits_to_len: |
398 | mov cl,bl | 398 | mov eax,1 |
399 | add bl,16 | 399 | shl eax,cl |
400 | shl eax,cl | 400 | dec eax |
401 | or ebp,eax | 401 | sub bl,cl |
402 | mov cl,ch | 402 | and eax,ebp |
403 | 403 | shr ebp,cl | |
404 | L_add_bits_to_dist: | 404 | add edx,eax |
405 | mov eax,1 | 405 | |
406 | shl eax,cl | 406 | L_save_len: |
407 | dec eax | 407 | mov [esp+24],edx |
408 | sub bl,cl | 408 | |
409 | and eax,ebp | 409 | |
410 | shr ebp,cl | 410 | L_decode_distance: |
411 | add edx,eax | 411 | ; 549 "inffast.S" |
412 | jmp L_check_window | 412 | cmp bl,15 |
413 | 413 | ja L_get_distance_code | |
414 | L_check_window: | 414 | |
415 | ; 625 "inffast.S" | 415 | xor eax,eax |
416 | mov [esp+44],esi | 416 | lodsw |
417 | mov eax,edi | 417 | mov cl,bl |
418 | sub eax, [esp+40] | 418 | add bl,16 |
419 | 419 | shl eax,cl | |
420 | cmp eax,edx | 420 | or ebp,eax |
421 | jb L_clip_window | 421 | |
422 | 422 | L_get_distance_code: | |
423 | mov ecx, [esp+24] | 423 | mov edx, [esp+4] |
424 | mov esi,edi | 424 | mov ecx, [esp+12] |
425 | sub esi,edx | 425 | and edx,ebp |
426 | 426 | mov eax, [ecx+edx*4] | |
427 | sub ecx,3 | 427 | |
428 | mov al, [esi] | 428 | |
429 | mov [edi],al | 429 | L_dodist: |
430 | mov al, [esi+1] | 430 | mov edx,eax |
431 | mov dl, [esi+2] | 431 | shr edx,16 |
432 | add esi,3 | 432 | mov cl,ah |
433 | mov [edi+1],al | 433 | sub bl,ah |
434 | mov [edi+2],dl | 434 | shr ebp,cl |
435 | add edi,3 | 435 | ; 584 "inffast.S" |
436 | rep movsb | 436 | mov cl,al |
437 | 437 | ||
438 | mov esi, [esp+44] | 438 | test al,16 |
439 | jmp L_while_test | 439 | jz L_test_for_second_level_dist |
440 | 440 | and cl,15 | |
441 | ALIGN 4 | 441 | jz L_check_dist_one |
442 | L_check_dist_one: | 442 | cmp bl,cl |
443 | cmp edx,1 | 443 | jae L_add_bits_to_dist |
444 | jne L_check_window | 444 | |
445 | cmp [esp+40],edi | 445 | mov ch,cl |
446 | je L_check_window | 446 | xor eax,eax |
447 | 447 | lodsw | |
448 | dec edi | 448 | mov cl,bl |
449 | mov ecx, [esp+24] | 449 | add bl,16 |
450 | mov al, [edi] | 450 | shl eax,cl |
451 | sub ecx,3 | 451 | or ebp,eax |
452 | 452 | mov cl,ch | |
453 | mov [edi+1],al | 453 | |
454 | mov [edi+2],al | 454 | L_add_bits_to_dist: |
455 | mov [edi+3],al | 455 | mov eax,1 |
456 | add edi,4 | 456 | shl eax,cl |
457 | rep stosb | 457 | dec eax |
458 | 458 | sub bl,cl | |
459 | jmp L_while_test | 459 | and eax,ebp |
460 | 460 | shr ebp,cl | |
461 | ALIGN 4 | 461 | add edx,eax |
462 | L_test_for_second_level_length: | 462 | jmp L_check_window |
463 | 463 | ||
464 | 464 | L_check_window: | |
465 | 465 | ; 625 "inffast.S" | |
466 | 466 | mov [esp+44],esi | |
467 | test al,64 | 467 | mov eax,edi |
468 | jnz L_test_for_end_of_block | 468 | sub eax, [esp+40] |
469 | 469 | ||
470 | mov eax,1 | 470 | cmp eax,edx |
471 | shl eax,cl | 471 | jb L_clip_window |
472 | dec eax | 472 | |
473 | and eax,ebp | 473 | mov ecx, [esp+24] |
474 | add eax,edx | 474 | mov esi,edi |
475 | mov edx, [esp+8] | 475 | sub esi,edx |
476 | mov eax, [edx+eax*4] | 476 | |
477 | jmp L_dolen | 477 | sub ecx,3 |
478 | 478 | mov al, [esi] | |
479 | ALIGN 4 | 479 | mov [edi],al |
480 | L_test_for_second_level_dist: | 480 | mov al, [esi+1] |
481 | 481 | mov dl, [esi+2] | |
482 | 482 | add esi,3 | |
483 | 483 | mov [edi+1],al | |
484 | 484 | mov [edi+2],dl | |
485 | test al,64 | 485 | add edi,3 |
486 | jnz L_invalid_distance_code | 486 | rep movsb |
487 | 487 | ||
488 | mov eax,1 | 488 | mov esi, [esp+44] |
489 | shl eax,cl | 489 | jmp L_while_test |
490 | dec eax | 490 | |
491 | and eax,ebp | 491 | ALIGN 4 |
492 | add eax,edx | 492 | L_check_dist_one: |
493 | mov edx, [esp+12] | 493 | cmp edx,1 |
494 | mov eax, [edx+eax*4] | 494 | jne L_check_window |
495 | jmp L_dodist | 495 | cmp [esp+40],edi |
496 | 496 | je L_check_window | |
497 | ALIGN 4 | 497 | |
498 | L_clip_window: | 498 | dec edi |
499 | ; 721 "inffast.S" | 499 | mov ecx, [esp+24] |
500 | mov ecx,eax | 500 | mov al, [edi] |
501 | mov eax, [esp+52] | 501 | sub ecx,3 |
502 | neg ecx | 502 | |
503 | mov esi, [esp+56] | 503 | mov [edi+1],al |
504 | 504 | mov [edi+2],al | |
505 | cmp eax,edx | 505 | mov [edi+3],al |
506 | jb L_invalid_distance_too_far | 506 | add edi,4 |
507 | 507 | rep stosb | |
508 | add ecx,edx | 508 | |
509 | cmp dword ptr [esp+48],0 | 509 | jmp L_while_test |
510 | jne L_wrap_around_window | 510 | |
511 | 511 | ALIGN 4 | |
512 | sub eax,ecx | 512 | L_test_for_second_level_length: |
513 | add esi,eax | 513 | |
514 | ; 749 "inffast.S" | 514 | |
515 | mov eax, [esp+24] | 515 | |
516 | cmp eax,ecx | 516 | |
517 | jbe L_do_copy1 | 517 | test al,64 |
518 | 518 | jnz L_test_for_end_of_block | |
519 | sub eax,ecx | 519 | |
520 | rep movsb | 520 | mov eax,1 |
521 | mov esi,edi | 521 | shl eax,cl |
522 | sub esi,edx | 522 | dec eax |
523 | jmp L_do_copy1 | 523 | and eax,ebp |
524 | 524 | add eax,edx | |
525 | cmp eax,ecx | 525 | mov edx, [esp+8] |
526 | jbe L_do_copy1 | 526 | mov eax, [edx+eax*4] |
527 | 527 | jmp L_dolen | |
528 | sub eax,ecx | 528 | |
529 | rep movsb | 529 | ALIGN 4 |
530 | mov esi,edi | 530 | L_test_for_second_level_dist: |
531 | sub esi,edx | 531 | |
532 | jmp L_do_copy1 | 532 | |
533 | 533 | ||
534 | L_wrap_around_window: | 534 | |
535 | ; 793 "inffast.S" | 535 | test al,64 |
536 | mov eax, [esp+48] | 536 | jnz L_invalid_distance_code |
537 | cmp ecx,eax | 537 | |
538 | jbe L_contiguous_in_window | 538 | mov eax,1 |
539 | 539 | shl eax,cl | |
540 | add esi, [esp+52] | 540 | dec eax |
541 | add esi,eax | 541 | and eax,ebp |
542 | sub esi,ecx | 542 | add eax,edx |
543 | sub ecx,eax | 543 | mov edx, [esp+12] |
544 | 544 | mov eax, [edx+eax*4] | |
545 | 545 | jmp L_dodist | |
546 | mov eax, [esp+24] | 546 | |
547 | cmp eax,ecx | 547 | ALIGN 4 |
548 | jbe L_do_copy1 | 548 | L_clip_window: |
549 | 549 | ; 721 "inffast.S" | |
550 | sub eax,ecx | 550 | mov ecx,eax |
551 | rep movsb | 551 | mov eax, [esp+52] |
552 | mov esi, [esp+56] | 552 | neg ecx |
553 | mov ecx, [esp+48] | 553 | mov esi, [esp+56] |
554 | cmp eax,ecx | 554 | |
555 | jbe L_do_copy1 | 555 | cmp eax,edx |
556 | 556 | jb L_invalid_distance_too_far | |
557 | sub eax,ecx | 557 | |
558 | rep movsb | 558 | add ecx,edx |
559 | mov esi,edi | 559 | cmp dword ptr [esp+48],0 |
560 | sub esi,edx | 560 | jne L_wrap_around_window |
561 | jmp L_do_copy1 | 561 | |
562 | 562 | sub eax,ecx | |
563 | L_contiguous_in_window: | 563 | add esi,eax |
564 | ; 836 "inffast.S" | 564 | ; 749 "inffast.S" |
565 | add esi,eax | 565 | mov eax, [esp+24] |
566 | sub esi,ecx | 566 | cmp eax,ecx |
567 | 567 | jbe L_do_copy1 | |
568 | 568 | ||
569 | mov eax, [esp+24] | 569 | sub eax,ecx |
570 | cmp eax,ecx | 570 | rep movsb |
571 | jbe L_do_copy1 | 571 | mov esi,edi |
572 | 572 | sub esi,edx | |
573 | sub eax,ecx | 573 | jmp L_do_copy1 |
574 | rep movsb | 574 | |
575 | mov esi,edi | 575 | cmp eax,ecx |
576 | sub esi,edx | 576 | jbe L_do_copy1 |
577 | 577 | ||
578 | L_do_copy1: | 578 | sub eax,ecx |
579 | ; 862 "inffast.S" | 579 | rep movsb |
580 | mov ecx,eax | 580 | mov esi,edi |
581 | rep movsb | 581 | sub esi,edx |
582 | 582 | jmp L_do_copy1 | |
583 | mov esi, [esp+44] | 583 | |
584 | jmp L_while_test | 584 | L_wrap_around_window: |
585 | ; 878 "inffast.S" | 585 | ; 793 "inffast.S" |
586 | ALIGN 4 | 586 | mov eax, [esp+48] |
587 | L_init_mmx: | 587 | cmp ecx,eax |
588 | emms | 588 | jbe L_contiguous_in_window |
589 | 589 | ||
590 | 590 | add esi, [esp+52] | |
591 | 591 | add esi,eax | |
592 | 592 | sub esi,ecx | |
593 | 593 | sub ecx,eax | |
594 | movd mm0,ebp | 594 | |
595 | mov ebp,ebx | 595 | |
596 | ; 896 "inffast.S" | 596 | mov eax, [esp+24] |
597 | movd mm4,[esp+0] | 597 | cmp eax,ecx |
598 | movq mm3,mm4 | 598 | jbe L_do_copy1 |
599 | movd mm5,[esp+4] | 599 | |
600 | movq mm2,mm5 | 600 | sub eax,ecx |
601 | pxor mm1,mm1 | 601 | rep movsb |
602 | mov ebx, [esp+8] | 602 | mov esi, [esp+56] |
603 | jmp L_do_loop_mmx | 603 | mov ecx, [esp+48] |
604 | 604 | cmp eax,ecx | |
605 | ALIGN 4 | 605 | jbe L_do_copy1 |
606 | L_do_loop_mmx: | 606 | |
607 | psrlq mm0,mm1 | 607 | sub eax,ecx |
608 | 608 | rep movsb | |
609 | cmp ebp,32 | 609 | mov esi,edi |
610 | ja L_get_length_code_mmx | 610 | sub esi,edx |
611 | 611 | jmp L_do_copy1 | |
612 | movd mm6,ebp | 612 | |
613 | movd mm7,[esi] | 613 | L_contiguous_in_window: |
614 | add esi,4 | 614 | ; 836 "inffast.S" |
615 | psllq mm7,mm6 | 615 | add esi,eax |
616 | add ebp,32 | 616 | sub esi,ecx |
617 | por mm0,mm7 | 617 | |
618 | 618 | ||
619 | L_get_length_code_mmx: | 619 | mov eax, [esp+24] |
620 | pand mm4,mm0 | 620 | cmp eax,ecx |
621 | movd eax,mm4 | 621 | jbe L_do_copy1 |
622 | movq mm4,mm3 | 622 | |
623 | mov eax, [ebx+eax*4] | 623 | sub eax,ecx |
624 | 624 | rep movsb | |
625 | L_dolen_mmx: | 625 | mov esi,edi |
626 | movzx ecx,ah | 626 | sub esi,edx |
627 | movd mm1,ecx | 627 | |
628 | sub ebp,ecx | 628 | L_do_copy1: |
629 | 629 | ; 862 "inffast.S" | |
630 | test al,al | 630 | mov ecx,eax |
631 | jnz L_test_for_length_base_mmx | 631 | rep movsb |
632 | 632 | ||
633 | shr eax,16 | 633 | mov esi, [esp+44] |
634 | stosb | 634 | jmp L_while_test |
635 | 635 | ; 878 "inffast.S" | |
636 | L_while_test_mmx: | 636 | ALIGN 4 |
637 | 637 | L_init_mmx: | |
638 | 638 | emms | |
639 | cmp [esp+16],edi | 639 | |
640 | jbe L_break_loop | 640 | |
641 | 641 | ||
642 | cmp [esp+20],esi | 642 | |
643 | ja L_do_loop_mmx | 643 | |
644 | jmp L_break_loop | 644 | movd mm0,ebp |
645 | 645 | mov ebp,ebx | |
646 | L_test_for_length_base_mmx: | 646 | ; 896 "inffast.S" |
647 | 647 | movd mm4,[esp+0] | |
648 | mov edx,eax | 648 | movq mm3,mm4 |
649 | shr edx,16 | 649 | movd mm5,[esp+4] |
650 | 650 | movq mm2,mm5 | |
651 | test al,16 | 651 | pxor mm1,mm1 |
652 | jz L_test_for_second_level_length_mmx | 652 | mov ebx, [esp+8] |
653 | and eax,15 | 653 | jmp L_do_loop_mmx |
654 | jz L_decode_distance_mmx | 654 | |
655 | 655 | ALIGN 4 | |
656 | psrlq mm0,mm1 | 656 | L_do_loop_mmx: |
657 | movd mm1,eax | 657 | psrlq mm0,mm1 |
658 | movd ecx,mm0 | 658 | |
659 | sub ebp,eax | 659 | cmp ebp,32 |
660 | and ecx, [inflate_fast_mask+eax*4] | 660 | ja L_get_length_code_mmx |
661 | add edx,ecx | 661 | |
662 | 662 | movd mm6,ebp | |
663 | L_decode_distance_mmx: | 663 | movd mm7,[esi] |
664 | psrlq mm0,mm1 | 664 | add esi,4 |
665 | 665 | psllq mm7,mm6 | |
666 | cmp ebp,32 | 666 | add ebp,32 |
667 | ja L_get_dist_code_mmx | 667 | por mm0,mm7 |
668 | 668 | ||
669 | movd mm6,ebp | 669 | L_get_length_code_mmx: |
670 | movd mm7,[esi] | 670 | pand mm4,mm0 |
671 | add esi,4 | 671 | movd eax,mm4 |
672 | psllq mm7,mm6 | 672 | movq mm4,mm3 |
673 | add ebp,32 | 673 | mov eax, [ebx+eax*4] |
674 | por mm0,mm7 | 674 | |
675 | 675 | L_dolen_mmx: | |
676 | L_get_dist_code_mmx: | 676 | movzx ecx,ah |
677 | mov ebx, [esp+12] | 677 | movd mm1,ecx |
678 | pand mm5,mm0 | 678 | sub ebp,ecx |
679 | movd eax,mm5 | 679 | |
680 | movq mm5,mm2 | 680 | test al,al |
681 | mov eax, [ebx+eax*4] | 681 | jnz L_test_for_length_base_mmx |
682 | 682 | ||
683 | L_dodist_mmx: | 683 | shr eax,16 |
684 | 684 | stosb | |
685 | movzx ecx,ah | 685 | |
686 | mov ebx,eax | 686 | L_while_test_mmx: |
687 | shr ebx,16 | 687 | |
688 | sub ebp,ecx | 688 | |
689 | movd mm1,ecx | 689 | cmp [esp+16],edi |
690 | 690 | jbe L_break_loop | |
691 | test al,16 | 691 | |
692 | jz L_test_for_second_level_dist_mmx | 692 | cmp [esp+20],esi |
693 | and eax,15 | 693 | ja L_do_loop_mmx |
694 | jz L_check_dist_one_mmx | 694 | jmp L_break_loop |
695 | 695 | ||
696 | L_add_bits_to_dist_mmx: | 696 | L_test_for_length_base_mmx: |
697 | psrlq mm0,mm1 | 697 | |
698 | movd mm1,eax | 698 | mov edx,eax |
699 | movd ecx,mm0 | 699 | shr edx,16 |
700 | sub ebp,eax | 700 | |
701 | and ecx, [inflate_fast_mask+eax*4] | 701 | test al,16 |
702 | add ebx,ecx | 702 | jz L_test_for_second_level_length_mmx |
703 | 703 | and eax,15 | |
704 | L_check_window_mmx: | 704 | jz L_decode_distance_mmx |
705 | mov [esp+44],esi | 705 | |
706 | mov eax,edi | 706 | psrlq mm0,mm1 |
707 | sub eax, [esp+40] | 707 | movd mm1,eax |
708 | 708 | movd ecx,mm0 | |
709 | cmp eax,ebx | 709 | sub ebp,eax |
710 | jb L_clip_window_mmx | 710 | and ecx, [inflate_fast_mask+eax*4] |
711 | 711 | add edx,ecx | |
712 | mov ecx,edx | 712 | |
713 | mov esi,edi | 713 | L_decode_distance_mmx: |
714 | sub esi,ebx | 714 | psrlq mm0,mm1 |
715 | 715 | ||
716 | sub ecx,3 | 716 | cmp ebp,32 |
717 | mov al, [esi] | 717 | ja L_get_dist_code_mmx |
718 | mov [edi],al | 718 | |
719 | mov al, [esi+1] | 719 | movd mm6,ebp |
720 | mov dl, [esi+2] | 720 | movd mm7,[esi] |
721 | add esi,3 | 721 | add esi,4 |
722 | mov [edi+1],al | 722 | psllq mm7,mm6 |
723 | mov [edi+2],dl | 723 | add ebp,32 |
724 | add edi,3 | 724 | por mm0,mm7 |
725 | rep movsb | 725 | |
726 | 726 | L_get_dist_code_mmx: | |
727 | mov esi, [esp+44] | 727 | mov ebx, [esp+12] |
728 | mov ebx, [esp+8] | 728 | pand mm5,mm0 |
729 | jmp L_while_test_mmx | 729 | movd eax,mm5 |
730 | 730 | movq mm5,mm2 | |
731 | ALIGN 4 | 731 | mov eax, [ebx+eax*4] |
732 | L_check_dist_one_mmx: | 732 | |
733 | cmp ebx,1 | 733 | L_dodist_mmx: |
734 | jne L_check_window_mmx | 734 | |
735 | cmp [esp+40],edi | 735 | movzx ecx,ah |
736 | je L_check_window_mmx | 736 | mov ebx,eax |
737 | 737 | shr ebx,16 | |
738 | dec edi | 738 | sub ebp,ecx |
739 | mov ecx,edx | 739 | movd mm1,ecx |
740 | mov al, [edi] | 740 | |
741 | sub ecx,3 | 741 | test al,16 |
742 | 742 | jz L_test_for_second_level_dist_mmx | |
743 | mov [edi+1],al | 743 | and eax,15 |
744 | mov [edi+2],al | 744 | jz L_check_dist_one_mmx |
745 | mov [edi+3],al | 745 | |
746 | add edi,4 | 746 | L_add_bits_to_dist_mmx: |
747 | rep stosb | 747 | psrlq mm0,mm1 |
748 | 748 | movd mm1,eax | |
749 | mov ebx, [esp+8] | 749 | movd ecx,mm0 |
750 | jmp L_while_test_mmx | 750 | sub ebp,eax |
751 | 751 | and ecx, [inflate_fast_mask+eax*4] | |
752 | ALIGN 4 | 752 | add ebx,ecx |
753 | L_test_for_second_level_length_mmx: | 753 | |
754 | test al,64 | 754 | L_check_window_mmx: |
755 | jnz L_test_for_end_of_block | 755 | mov [esp+44],esi |
756 | 756 | mov eax,edi | |
757 | and eax,15 | 757 | sub eax, [esp+40] |
758 | psrlq mm0,mm1 | 758 | |
759 | movd ecx,mm0 | 759 | cmp eax,ebx |
760 | and ecx, [inflate_fast_mask+eax*4] | 760 | jb L_clip_window_mmx |
761 | add ecx,edx | 761 | |
762 | mov eax, [ebx+ecx*4] | 762 | mov ecx,edx |
763 | jmp L_dolen_mmx | 763 | mov esi,edi |
764 | 764 | sub esi,ebx | |
765 | ALIGN 4 | 765 | |
766 | L_test_for_second_level_dist_mmx: | 766 | sub ecx,3 |
767 | test al,64 | 767 | mov al, [esi] |
768 | jnz L_invalid_distance_code | 768 | mov [edi],al |
769 | 769 | mov al, [esi+1] | |
770 | and eax,15 | 770 | mov dl, [esi+2] |
771 | psrlq mm0,mm1 | 771 | add esi,3 |
772 | movd ecx,mm0 | 772 | mov [edi+1],al |
773 | and ecx, [inflate_fast_mask+eax*4] | 773 | mov [edi+2],dl |
774 | mov eax, [esp+12] | 774 | add edi,3 |
775 | add ecx,ebx | 775 | rep movsb |
776 | mov eax, [eax+ecx*4] | 776 | |
777 | jmp L_dodist_mmx | 777 | mov esi, [esp+44] |
778 | 778 | mov ebx, [esp+8] | |
779 | ALIGN 4 | 779 | jmp L_while_test_mmx |
780 | L_clip_window_mmx: | 780 | |
781 | 781 | ALIGN 4 | |
782 | mov ecx,eax | 782 | L_check_dist_one_mmx: |
783 | mov eax, [esp+52] | 783 | cmp ebx,1 |
784 | neg ecx | 784 | jne L_check_window_mmx |
785 | mov esi, [esp+56] | 785 | cmp [esp+40],edi |
786 | 786 | je L_check_window_mmx | |
787 | cmp eax,ebx | 787 | |
788 | jb L_invalid_distance_too_far | 788 | dec edi |
789 | 789 | mov ecx,edx | |
790 | add ecx,ebx | 790 | mov al, [edi] |
791 | cmp dword ptr [esp+48],0 | 791 | sub ecx,3 |
792 | jne L_wrap_around_window_mmx | 792 | |
793 | 793 | mov [edi+1],al | |
794 | sub eax,ecx | 794 | mov [edi+2],al |
795 | add esi,eax | 795 | mov [edi+3],al |
796 | 796 | add edi,4 | |
797 | cmp edx,ecx | 797 | rep stosb |
798 | jbe L_do_copy1_mmx | 798 | |
799 | 799 | mov ebx, [esp+8] | |
800 | sub edx,ecx | 800 | jmp L_while_test_mmx |
801 | rep movsb | 801 | |
802 | mov esi,edi | 802 | ALIGN 4 |
803 | sub esi,ebx | 803 | L_test_for_second_level_length_mmx: |
804 | jmp L_do_copy1_mmx | 804 | test al,64 |
805 | 805 | jnz L_test_for_end_of_block | |
806 | cmp edx,ecx | 806 | |
807 | jbe L_do_copy1_mmx | 807 | and eax,15 |
808 | 808 | psrlq mm0,mm1 | |
809 | sub edx,ecx | 809 | movd ecx,mm0 |
810 | rep movsb | 810 | and ecx, [inflate_fast_mask+eax*4] |
811 | mov esi,edi | 811 | add ecx,edx |
812 | sub esi,ebx | 812 | mov eax, [ebx+ecx*4] |
813 | jmp L_do_copy1_mmx | 813 | jmp L_dolen_mmx |
814 | 814 | ||
815 | L_wrap_around_window_mmx: | 815 | ALIGN 4 |
816 | 816 | L_test_for_second_level_dist_mmx: | |
817 | mov eax, [esp+48] | 817 | test al,64 |
818 | cmp ecx,eax | 818 | jnz L_invalid_distance_code |
819 | jbe L_contiguous_in_window_mmx | 819 | |
820 | 820 | and eax,15 | |
821 | add esi, [esp+52] | 821 | psrlq mm0,mm1 |
822 | add esi,eax | 822 | movd ecx,mm0 |
823 | sub esi,ecx | 823 | and ecx, [inflate_fast_mask+eax*4] |
824 | sub ecx,eax | 824 | mov eax, [esp+12] |
825 | 825 | add ecx,ebx | |
826 | 826 | mov eax, [eax+ecx*4] | |
827 | cmp edx,ecx | 827 | jmp L_dodist_mmx |
828 | jbe L_do_copy1_mmx | 828 | |
829 | 829 | ALIGN 4 | |
830 | sub edx,ecx | 830 | L_clip_window_mmx: |
831 | rep movsb | 831 | |
832 | mov esi, [esp+56] | 832 | mov ecx,eax |
833 | mov ecx, [esp+48] | 833 | mov eax, [esp+52] |
834 | cmp edx,ecx | 834 | neg ecx |
835 | jbe L_do_copy1_mmx | 835 | mov esi, [esp+56] |
836 | 836 | ||
837 | sub edx,ecx | 837 | cmp eax,ebx |
838 | rep movsb | 838 | jb L_invalid_distance_too_far |
839 | mov esi,edi | 839 | |
840 | sub esi,ebx | 840 | add ecx,ebx |
841 | jmp L_do_copy1_mmx | 841 | cmp dword ptr [esp+48],0 |
842 | 842 | jne L_wrap_around_window_mmx | |
843 | L_contiguous_in_window_mmx: | 843 | |
844 | 844 | sub eax,ecx | |
845 | add esi,eax | 845 | add esi,eax |
846 | sub esi,ecx | 846 | |
847 | 847 | cmp edx,ecx | |
848 | 848 | jbe L_do_copy1_mmx | |
849 | cmp edx,ecx | 849 | |
850 | jbe L_do_copy1_mmx | 850 | sub edx,ecx |
851 | 851 | rep movsb | |
852 | sub edx,ecx | 852 | mov esi,edi |
853 | rep movsb | 853 | sub esi,ebx |
854 | mov esi,edi | 854 | jmp L_do_copy1_mmx |
855 | sub esi,ebx | 855 | |
856 | 856 | cmp edx,ecx | |
857 | L_do_copy1_mmx: | 857 | jbe L_do_copy1_mmx |
858 | 858 | ||
859 | 859 | sub edx,ecx | |
860 | mov ecx,edx | 860 | rep movsb |
861 | rep movsb | 861 | mov esi,edi |
862 | 862 | sub esi,ebx | |
863 | mov esi, [esp+44] | 863 | jmp L_do_copy1_mmx |
864 | mov ebx, [esp+8] | 864 | |
865 | jmp L_while_test_mmx | 865 | L_wrap_around_window_mmx: |
866 | ; 1174 "inffast.S" | 866 | |
867 | L_invalid_distance_code: | 867 | mov eax, [esp+48] |
868 | 868 | cmp ecx,eax | |
869 | 869 | jbe L_contiguous_in_window_mmx | |
870 | 870 | ||
871 | 871 | add esi, [esp+52] | |
872 | 872 | add esi,eax | |
873 | mov ecx, invalid_distance_code_msg | 873 | sub esi,ecx |
874 | mov edx,26 | 874 | sub ecx,eax |
875 | jmp L_update_stream_state | 875 | |
876 | 876 | ||
877 | L_test_for_end_of_block: | 877 | cmp edx,ecx |
878 | 878 | jbe L_do_copy1_mmx | |
879 | 879 | ||
880 | 880 | sub edx,ecx | |
881 | 881 | rep movsb | |
882 | 882 | mov esi, [esp+56] | |
883 | test al,32 | 883 | mov ecx, [esp+48] |
884 | jz L_invalid_literal_length_code | 884 | cmp edx,ecx |
885 | 885 | jbe L_do_copy1_mmx | |
886 | mov ecx,0 | 886 | |
887 | mov edx,11 | 887 | sub edx,ecx |
888 | jmp L_update_stream_state | 888 | rep movsb |
889 | 889 | mov esi,edi | |
890 | L_invalid_literal_length_code: | 890 | sub esi,ebx |
891 | 891 | jmp L_do_copy1_mmx | |
892 | 892 | ||
893 | 893 | L_contiguous_in_window_mmx: | |
894 | 894 | ||
895 | 895 | add esi,eax | |
896 | mov ecx, invalid_literal_length_code_msg | 896 | sub esi,ecx |
897 | mov edx,26 | 897 | |
898 | jmp L_update_stream_state | 898 | |
899 | 899 | cmp edx,ecx | |
900 | L_invalid_distance_too_far: | 900 | jbe L_do_copy1_mmx |
901 | 901 | ||
902 | 902 | sub edx,ecx | |
903 | 903 | rep movsb | |
904 | mov esi, [esp+44] | 904 | mov esi,edi |
905 | mov ecx, invalid_distance_too_far_msg | 905 | sub esi,ebx |
906 | mov edx,26 | 906 | |
907 | jmp L_update_stream_state | 907 | L_do_copy1_mmx: |
908 | 908 | ||
909 | L_update_stream_state: | 909 | |
910 | 910 | mov ecx,edx | |
911 | mov eax, [esp+88] | 911 | rep movsb |
912 | test ecx,ecx | 912 | |
913 | jz L_skip_msg | 913 | mov esi, [esp+44] |
914 | mov [eax+24],ecx | 914 | mov ebx, [esp+8] |
915 | L_skip_msg: | 915 | jmp L_while_test_mmx |
916 | mov eax, [eax+28] | 916 | ; 1174 "inffast.S" |
917 | mov [eax+mode_state],edx | 917 | L_invalid_distance_code: |
918 | jmp L_break_loop | 918 | |
919 | 919 | ||
920 | ALIGN 4 | 920 | |
921 | L_break_loop: | 921 | |
922 | ; 1243 "inffast.S" | 922 | |
923 | cmp dword ptr [inflate_fast_use_mmx],2 | 923 | mov ecx, invalid_distance_code_msg |
924 | jne L_update_next_in | 924 | mov edx,INFLATE_MODE_BAD |
925 | 925 | jmp L_update_stream_state | |
926 | 926 | ||
927 | 927 | L_test_for_end_of_block: | |
928 | mov ebx,ebp | 928 | |
929 | 929 | ||
930 | L_update_next_in: | 930 | |
931 | ; 1266 "inffast.S" | 931 | |
932 | mov eax, [esp+88] | 932 | |
933 | mov ecx,ebx | 933 | test al,32 |
934 | mov edx, [eax+28] | 934 | jz L_invalid_literal_length_code |
935 | shr ecx,3 | 935 | |
936 | sub esi,ecx | 936 | mov ecx,0 |
937 | shl ecx,3 | 937 | mov edx,INFLATE_MODE_TYPE |
938 | sub ebx,ecx | 938 | jmp L_update_stream_state |
939 | mov [eax+12],edi | 939 | |
940 | mov [edx+bits_state],ebx | 940 | L_invalid_literal_length_code: |
941 | mov ecx,ebx | 941 | |
942 | 942 | ||
943 | lea ebx, [esp+28] | 943 | |
944 | cmp [esp+20],ebx | 944 | |
945 | jne L_buf_not_used | 945 | |
946 | 946 | mov ecx, invalid_literal_length_code_msg | |
947 | sub esi,ebx | 947 | mov edx,INFLATE_MODE_BAD |
948 | mov ebx, [eax+0] | 948 | jmp L_update_stream_state |
949 | mov [esp+20],ebx | 949 | |
950 | add esi,ebx | 950 | L_invalid_distance_too_far: |
951 | mov ebx, [eax+4] | 951 | |
952 | sub ebx,11 | 952 | |
953 | add [esp+20],ebx | 953 | |
954 | 954 | mov esi, [esp+44] | |
955 | L_buf_not_used: | 955 | mov ecx, invalid_distance_too_far_msg |
956 | mov [eax+0],esi | 956 | mov edx,INFLATE_MODE_BAD |
957 | 957 | jmp L_update_stream_state | |
958 | mov ebx,1 | 958 | |
959 | shl ebx,cl | 959 | L_update_stream_state: |
960 | dec ebx | 960 | |
961 | 961 | mov eax, [esp+88] | |
962 | 962 | test ecx,ecx | |
963 | 963 | jz L_skip_msg | |
964 | 964 | mov [eax+24],ecx | |
965 | 965 | L_skip_msg: | |
966 | cmp dword ptr [inflate_fast_use_mmx],2 | 966 | mov eax, [eax+28] |
967 | jne L_update_hold | 967 | mov [eax+mode_state],edx |
968 | 968 | jmp L_break_loop | |
969 | 969 | ||
970 | 970 | ALIGN 4 | |
971 | psrlq mm0,mm1 | 971 | L_break_loop: |
972 | movd ebp,mm0 | 972 | ; 1243 "inffast.S" |
973 | 973 | cmp dword ptr [inflate_fast_use_mmx],2 | |
974 | emms | 974 | jne L_update_next_in |
975 | 975 | ||
976 | L_update_hold: | 976 | |
977 | 977 | ||
978 | 978 | mov ebx,ebp | |
979 | 979 | ||
980 | and ebp,ebx | 980 | L_update_next_in: |
981 | mov [edx+hold_state],ebp | 981 | ; 1266 "inffast.S" |
982 | 982 | mov eax, [esp+88] | |
983 | 983 | mov ecx,ebx | |
984 | 984 | mov edx, [eax+28] | |
985 | 985 | shr ecx,3 | |
986 | mov ebx, [esp+20] | 986 | sub esi,ecx |
987 | cmp ebx,esi | 987 | shl ecx,3 |
988 | jbe L_last_is_smaller | 988 | sub ebx,ecx |
989 | 989 | mov [eax+12],edi | |
990 | sub ebx,esi | 990 | mov [edx+bits_state],ebx |
991 | add ebx,11 | 991 | mov ecx,ebx |
992 | mov [eax+4],ebx | 992 | |
993 | jmp L_fixup_out | 993 | lea ebx, [esp+28] |
994 | L_last_is_smaller: | 994 | cmp [esp+20],ebx |
995 | sub esi,ebx | 995 | jne L_buf_not_used |
996 | neg esi | 996 | |
997 | add esi,11 | 997 | sub esi,ebx |
998 | mov [eax+4],esi | 998 | mov ebx, [eax+0] |
999 | 999 | mov [esp+20],ebx | |
1000 | 1000 | add esi,ebx | |
1001 | 1001 | mov ebx, [eax+4] | |
1002 | 1002 | sub ebx,11 | |
1003 | L_fixup_out: | 1003 | add [esp+20],ebx |
1004 | 1004 | ||
1005 | mov ebx, [esp+16] | 1005 | L_buf_not_used: |
1006 | cmp ebx,edi | 1006 | mov [eax+0],esi |
1007 | jbe L_end_is_smaller | 1007 | |
1008 | 1008 | mov ebx,1 | |
1009 | sub ebx,edi | 1009 | shl ebx,cl |
1010 | add ebx,257 | 1010 | dec ebx |
1011 | mov [eax+16],ebx | 1011 | |
1012 | jmp L_done | 1012 | |
1013 | L_end_is_smaller: | 1013 | |
1014 | sub edi,ebx | 1014 | |
1015 | neg edi | 1015 | |
1016 | add edi,257 | 1016 | cmp dword ptr [inflate_fast_use_mmx],2 |
1017 | mov [eax+16],edi | 1017 | jne L_update_hold |
1018 | 1018 | ||
1019 | 1019 | ||
1020 | 1020 | ||
1021 | 1021 | psrlq mm0,mm1 | |
1022 | 1022 | movd ebp,mm0 | |
1023 | L_done: | 1023 | |
1024 | add esp,64 | 1024 | emms |
1025 | popfd | 1025 | |
1026 | pop ebx | 1026 | L_update_hold: |
1027 | pop ebp | 1027 | |
1028 | pop esi | 1028 | |
1029 | pop edi | 1029 | |
1030 | ret | 1030 | and ebp,ebx |
1031 | 1031 | mov [edx+hold_state],ebp | |
1032 | 1032 | ||
1033 | 1033 | ||
1034 | 1034 | ||
1035 | _TEXT ends | 1035 | |
1036 | end | 1036 | mov ebx, [esp+20] |
1037 | cmp ebx,esi | ||
1038 | jbe L_last_is_smaller | ||
1039 | |||
1040 | sub ebx,esi | ||
1041 | add ebx,11 | ||
1042 | mov [eax+4],ebx | ||
1043 | jmp L_fixup_out | ||
1044 | L_last_is_smaller: | ||
1045 | sub esi,ebx | ||
1046 | neg esi | ||
1047 | add esi,11 | ||
1048 | mov [eax+4],esi | ||
1049 | |||
1050 | |||
1051 | |||
1052 | |||
1053 | L_fixup_out: | ||
1054 | |||
1055 | mov ebx, [esp+16] | ||
1056 | cmp ebx,edi | ||
1057 | jbe L_end_is_smaller | ||
1058 | |||
1059 | sub ebx,edi | ||
1060 | add ebx,257 | ||
1061 | mov [eax+16],ebx | ||
1062 | jmp L_done | ||
1063 | L_end_is_smaller: | ||
1064 | sub edi,ebx | ||
1065 | neg edi | ||
1066 | add edi,257 | ||
1067 | mov [eax+16],edi | ||
1068 | |||
1069 | |||
1070 | |||
1071 | |||
1072 | |||
1073 | L_done: | ||
1074 | add esp,64 | ||
1075 | popfd | ||
1076 | pop ebx | ||
1077 | pop ebp | ||
1078 | pop esi | ||
1079 | pop edi | ||
1080 | ret | ||
1081 | |||
1082 | _TEXT ends | ||
1083 | end | ||