diff options
author | Mark Adler <madler@alumni.caltech.edu> | 2011-09-09 23:22:37 -0700 |
---|---|---|
committer | Mark Adler <madler@alumni.caltech.edu> | 2011-09-09 23:22:37 -0700 |
commit | 4b5a43a219d51066c01ff2ab86af18b967f2d0dd (patch) | |
tree | 4dcaf0cd18751d04cf638a9a6ec521990d4f2e90 /contrib/masm686 | |
parent | 086e982175da84b3db958191031380794315f95f (diff) | |
download | zlib-1.2.0.5.tar.gz zlib-1.2.0.5.tar.bz2 zlib-1.2.0.5.zip |
zlib 1.2.0.5v1.2.0.5
Diffstat (limited to 'contrib/masm686')
-rw-r--r-- | contrib/masm686/match.asm | 408 |
1 files changed, 408 insertions, 0 deletions
diff --git a/contrib/masm686/match.asm b/contrib/masm686/match.asm new file mode 100644 index 0000000..2287804 --- /dev/null +++ b/contrib/masm686/match.asm | |||
@@ -0,0 +1,408 @@ | |||
1 | |||
2 | ; match.asm -- Pentium-Pro optimized version of longest_match() | ||
3 | ; | ||
4 | ; Updated for zlib 1.1.3 and converted to MASM 6.1x | ||
5 | ; Copyright (C) 2000 Dan Higdon <hdan@kinesoft.com> | ||
6 | ; and Chuck Walbourn <chuckw@kinesoft.com> | ||
7 | ; Corrections by Cosmin Truta <cosmint@cs.ubbcluj.ro> | ||
8 | ; | ||
9 | ; This is free software; you can redistribute it and/or modify it | ||
10 | ; under the terms of the GNU General Public License. | ||
11 | |||
12 | ; Based on match.S | ||
13 | ; Written for zlib 1.1.2 | ||
14 | ; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> | ||
15 | |||
16 | .686P | ||
17 | .MODEL FLAT | ||
18 | |||
19 | ;=========================================================================== | ||
20 | ; EQUATES | ||
21 | ;=========================================================================== | ||
22 | |||
23 | MAX_MATCH EQU 258 | ||
24 | MIN_MATCH EQU 3 | ||
25 | MIN_LOOKAHEAD EQU (MAX_MATCH + MIN_MATCH + 1) | ||
26 | MAX_MATCH_8 EQU ((MAX_MATCH + 7) AND (NOT 7)) | ||
27 | |||
28 | ;=========================================================================== | ||
29 | ; STRUCTURES | ||
30 | ;=========================================================================== | ||
31 | |||
32 | ; This STRUCT assumes a 4-byte alignment | ||
33 | |||
34 | DEFLATE_STATE STRUCT | ||
35 | ds_strm dd ? | ||
36 | ds_status dd ? | ||
37 | ds_pending_buf dd ? | ||
38 | ds_pending_buf_size dd ? | ||
39 | ds_pending_out dd ? | ||
40 | ds_pending dd ? | ||
41 | ds_wrap dd ? | ||
42 | ds_data_type db ? | ||
43 | ds_method db ? | ||
44 | db ? ; padding | ||
45 | db ? ; padding | ||
46 | ds_last_flush dd ? | ||
47 | ds_w_size dd ? ; used | ||
48 | ds_w_bits dd ? | ||
49 | ds_w_mask dd ? ; used | ||
50 | ds_window dd ? ; used | ||
51 | ds_window_size dd ? | ||
52 | ds_prev dd ? ; used | ||
53 | ds_head dd ? | ||
54 | ds_ins_h dd ? | ||
55 | ds_hash_size dd ? | ||
56 | ds_hash_bits dd ? | ||
57 | ds_hash_mask dd ? | ||
58 | ds_hash_shift dd ? | ||
59 | ds_block_start dd ? | ||
60 | ds_match_length dd ? ; used | ||
61 | ds_prev_match dd ? ; used | ||
62 | ds_match_available dd ? | ||
63 | ds_strstart dd ? ; used | ||
64 | ds_match_start dd ? ; used | ||
65 | ds_lookahead dd ? ; used | ||
66 | ds_prev_length dd ? ; used | ||
67 | ds_max_chain_length dd ? ; used | ||
68 | ds_max_laxy_match dd ? | ||
69 | ds_level dd ? | ||
70 | ds_strategy dd ? | ||
71 | ds_good_match dd ? ; used | ||
72 | ds_nice_match dd ? ; used | ||
73 | |||
74 | ; Don't need anymore of the struct for match | ||
75 | DEFLATE_STATE ENDS | ||
76 | |||
77 | ;=========================================================================== | ||
78 | ; CODE | ||
79 | ;=========================================================================== | ||
80 | _TEXT SEGMENT | ||
81 | |||
82 | ;--------------------------------------------------------------------------- | ||
83 | ; match_init | ||
84 | ;--------------------------------------------------------------------------- | ||
85 | ALIGN 4 | ||
86 | PUBLIC _match_init | ||
87 | _match_init PROC | ||
88 | ; no initialization needed | ||
89 | ret | ||
90 | _match_init ENDP | ||
91 | |||
92 | ;--------------------------------------------------------------------------- | ||
93 | ; uInt longest_match(deflate_state *deflatestate, IPos curmatch) | ||
94 | ;--------------------------------------------------------------------------- | ||
95 | ALIGN 4 | ||
96 | |||
97 | PUBLIC _longest_match | ||
98 | _longest_match PROC | ||
99 | |||
100 | ; Since this code uses EBP for a scratch register, the stack frame must | ||
101 | ; be manually constructed and referenced relative to the ESP register. | ||
102 | |||
103 | ; Stack image | ||
104 | ; Variables | ||
105 | chainlenwmask = 0 ; high word: current chain len | ||
106 | ; low word: s->wmask | ||
107 | window = 4 ; local copy of s->window | ||
108 | windowbestlen = 8 ; s->window + bestlen | ||
109 | scanend = 12 ; last two bytes of string | ||
110 | scanstart = 16 ; first two bytes of string | ||
111 | scanalign = 20 ; dword-misalignment of string | ||
112 | nicematch = 24 ; a good enough match size | ||
113 | bestlen = 28 ; size of best match so far | ||
114 | scan = 32 ; ptr to string wanting match | ||
115 | varsize = 36 ; number of bytes (also offset to last saved register) | ||
116 | |||
117 | ; Saved Registers (actually pushed into place) | ||
118 | ebx_save = 36 | ||
119 | edi_save = 40 | ||
120 | esi_save = 44 | ||
121 | ebp_save = 48 | ||
122 | |||
123 | ; Parameters | ||
124 | retaddr = 52 | ||
125 | deflatestate = 56 | ||
126 | curmatch = 60 | ||
127 | |||
128 | ; Save registers that the compiler may be using | ||
129 | push ebp | ||
130 | push edi | ||
131 | push esi | ||
132 | push ebx | ||
133 | |||
134 | ; Allocate local variable space | ||
135 | sub esp,varsize | ||
136 | |||
137 | ; Retrieve the function arguments. ecx will hold cur_match | ||
138 | ; throughout the entire function. edx will hold the pointer to the | ||
139 | ; deflate_state structure during the function's setup (before | ||
140 | ; entering the main loop). | ||
141 | |||
142 | mov edx, [esp+deflatestate] | ||
143 | ASSUME edx:PTR DEFLATE_STATE | ||
144 | |||
145 | mov ecx, [esp+curmatch] | ||
146 | |||
147 | ; uInt wmask = s->w_mask; | ||
148 | ; unsigned chain_length = s->max_chain_length; | ||
149 | ; if (s->prev_length >= s->good_match) { | ||
150 | ; chain_length >>= 2; | ||
151 | ; } | ||
152 | |||
153 | mov eax, [edx].ds_prev_length | ||
154 | mov ebx, [edx].ds_good_match | ||
155 | cmp eax, ebx | ||
156 | mov eax, [edx].ds_w_mask | ||
157 | mov ebx, [edx].ds_max_chain_length | ||
158 | jl SHORT LastMatchGood | ||
159 | shr ebx, 2 | ||
160 | LastMatchGood: | ||
161 | |||
162 | ; chainlen is decremented once beforehand so that the function can | ||
163 | ; use the sign flag instead of the zero flag for the exit test. | ||
164 | ; It is then shifted into the high word, to make room for the wmask | ||
165 | ; value, which it will always accompany. | ||
166 | |||
167 | dec ebx | ||
168 | shl ebx, 16 | ||
169 | or ebx, eax | ||
170 | mov [esp+chainlenwmask], ebx | ||
171 | |||
172 | ; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; | ||
173 | |||
174 | mov eax, [edx].ds_nice_match | ||
175 | mov ebx, [edx].ds_lookahead | ||
176 | cmp ebx, eax | ||
177 | jl SHORT LookaheadLess | ||
178 | mov ebx, eax | ||
179 | LookaheadLess: | ||
180 | mov [esp+nicematch], ebx | ||
181 | |||
182 | ;/* register Bytef *scan = s->window + s->strstart; */ | ||
183 | |||
184 | mov esi, [edx].ds_window | ||
185 | mov [esp+window], esi | ||
186 | mov ebp, [edx].ds_strstart | ||
187 | lea edi, [esi+ebp] | ||
188 | mov [esp+scan],edi | ||
189 | |||
190 | ;/* Determine how many bytes the scan ptr is off from being */ | ||
191 | ;/* dword-aligned. */ | ||
192 | |||
193 | mov eax, edi | ||
194 | neg eax | ||
195 | and eax, 3 | ||
196 | mov [esp+scanalign], eax | ||
197 | |||
198 | ;/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ | ||
199 | ;/* s->strstart - (IPos)MAX_DIST(s) : NIL; */ | ||
200 | |||
201 | mov eax, [edx].ds_w_size | ||
202 | sub eax, MIN_LOOKAHEAD | ||
203 | sub ebp, eax | ||
204 | jg SHORT LimitPositive | ||
205 | xor ebp, ebp | ||
206 | LimitPositive: | ||
207 | |||
208 | ;/* int best_len = s->prev_length; */ | ||
209 | |||
210 | mov eax, [edx].ds_prev_length | ||
211 | mov [esp+bestlen], eax | ||
212 | |||
213 | ;/* Store the sum of s->window + best_len in %esi locally, and in %esi. */ | ||
214 | |||
215 | add esi, eax | ||
216 | mov [esp+windowbestlen], esi | ||
217 | |||
218 | ;/* register ush scan_start = *(ushf*)scan; */ | ||
219 | ;/* register ush scan_end = *(ushf*)(scan+best_len-1); */ | ||
220 | ;/* Posf *prev = s->prev; */ | ||
221 | |||
222 | movzx ebx, WORD PTR[edi] | ||
223 | mov [esp+scanstart], ebx | ||
224 | movzx ebx, WORD PTR[eax+edi-1] | ||
225 | mov [esp+scanend], ebx | ||
226 | mov edi, [edx].ds_prev | ||
227 | |||
228 | ;/* Jump into the main loop. */ | ||
229 | |||
230 | mov edx, [esp+chainlenwmask] | ||
231 | jmp SHORT LoopEntry | ||
232 | |||
233 | ;/* do { | ||
234 | ; * match = s->window + cur_match; | ||
235 | ; * if (*(ushf*)(match+best_len-1) != scan_end || | ||
236 | ; * *(ushf*)match != scan_start) continue; | ||
237 | ; * [...] | ||
238 | ; * } while ((cur_match = prev[cur_match & wmask]) > limit | ||
239 | ; * && --chain_length != 0); | ||
240 | ; * | ||
241 | ; * Here is the inner loop of the function. The function will spend the | ||
242 | ; * majority of its time in this loop, and majority of that time will | ||
243 | ; * be spent in the first ten instructions. | ||
244 | ; * | ||
245 | ; * Within this loop: | ||
246 | ; * %ebx = scanend | ||
247 | ; * %ecx = curmatch | ||
248 | ; * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) | ||
249 | ; * %esi = windowbestlen - i.e., (window + bestlen) | ||
250 | ; * %edi = prev | ||
251 | ; * %ebp = limit | ||
252 | ; */ | ||
253 | |||
254 | ALIGN 4 | ||
255 | LookupLoop: | ||
256 | and ecx, edx | ||
257 | movzx ecx, WORD PTR[edi+ecx*2] | ||
258 | cmp ecx, ebp | ||
259 | jbe LeaveNow | ||
260 | sub edx, 000010000H | ||
261 | js LeaveNow | ||
262 | |||
263 | LoopEntry: | ||
264 | movzx eax, WORD PTR[esi+ecx-1] | ||
265 | cmp eax, ebx | ||
266 | jnz SHORT LookupLoop | ||
267 | |||
268 | mov eax, [esp+window] | ||
269 | movzx eax, WORD PTR[eax+ecx] | ||
270 | cmp eax, [esp+scanstart] | ||
271 | jnz SHORT LookupLoop | ||
272 | |||
273 | ;/* Store the current value of chainlen. */ | ||
274 | |||
275 | mov [esp+chainlenwmask], edx | ||
276 | |||
277 | ;/* Point %edi to the string under scrutiny, and %esi to the string we */ | ||
278 | ;/* are hoping to match it up with. In actuality, %esi and %edi are */ | ||
279 | ;/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ | ||
280 | ;/* initialized to -(MAX_MATCH_8 - scanalign). */ | ||
281 | |||
282 | mov esi, [esp+window] | ||
283 | mov edi, [esp+scan] | ||
284 | add esi, ecx | ||
285 | mov eax, [esp+scanalign] | ||
286 | mov edx, -MAX_MATCH_8 | ||
287 | lea edi, [edi+eax+MAX_MATCH_8] | ||
288 | lea esi, [esi+eax+MAX_MATCH_8] | ||
289 | |||
290 | ;/* Test the strings for equality, 8 bytes at a time. At the end, | ||
291 | ; * adjust %edx so that it is offset to the exact byte that mismatched. | ||
292 | ; * | ||
293 | ; * We already know at this point that the first three bytes of the | ||
294 | ; * strings match each other, and they can be safely passed over before | ||
295 | ; * starting the compare loop. So what this code does is skip over 0-3 | ||
296 | ; * bytes, as much as necessary in order to dword-align the %edi | ||
297 | ; * pointer. (%esi will still be misaligned three times out of four.) | ||
298 | ; * | ||
299 | ; * It should be confessed that this loop usually does not represent | ||
300 | ; * much of the total running time. Replacing it with a more | ||
301 | ; * straightforward "rep cmpsb" would not drastically degrade | ||
302 | ; * performance. | ||
303 | ; */ | ||
304 | |||
305 | LoopCmps: | ||
306 | mov eax, DWORD PTR[esi+edx] | ||
307 | xor eax, DWORD PTR[edi+edx] | ||
308 | jnz SHORT LeaveLoopCmps | ||
309 | |||
310 | mov eax, DWORD PTR[esi+edx+4] | ||
311 | xor eax, DWORD PTR[edi+edx+4] | ||
312 | jnz SHORT LeaveLoopCmps4 | ||
313 | |||
314 | add edx, 8 | ||
315 | jnz SHORT LoopCmps | ||
316 | jmp LenMaximum | ||
317 | ALIGN 4 | ||
318 | |||
319 | LeaveLoopCmps4: | ||
320 | add edx, 4 | ||
321 | |||
322 | LeaveLoopCmps: | ||
323 | test eax, 00000FFFFH | ||
324 | jnz SHORT LenLower | ||
325 | |||
326 | add edx, 2 | ||
327 | shr eax, 16 | ||
328 | |||
329 | LenLower: | ||
330 | sub al, 1 | ||
331 | adc edx, 0 | ||
332 | |||
333 | ;/* Calculate the length of the match. If it is longer than MAX_MATCH, */ | ||
334 | ;/* then automatically accept it as the best possible match and leave. */ | ||
335 | |||
336 | lea eax, [edi+edx] | ||
337 | mov edi, [esp+scan] | ||
338 | sub eax, edi | ||
339 | cmp eax, MAX_MATCH | ||
340 | jge SHORT LenMaximum | ||
341 | |||
342 | ;/* If the length of the match is not longer than the best match we */ | ||
343 | ;/* have so far, then forget it and return to the lookup loop. */ | ||
344 | |||
345 | mov edx, [esp+deflatestate] | ||
346 | mov ebx, [esp+bestlen] | ||
347 | cmp eax, ebx | ||
348 | jg SHORT LongerMatch | ||
349 | mov esi, [esp+windowbestlen] | ||
350 | mov edi, [edx].ds_prev | ||
351 | mov ebx, [esp+scanend] | ||
352 | mov edx, [esp+chainlenwmask] | ||
353 | jmp LookupLoop | ||
354 | ALIGN 4 | ||
355 | |||
356 | ;/* s->match_start = cur_match; */ | ||
357 | ;/* best_len = len; */ | ||
358 | ;/* if (len >= nice_match) break; */ | ||
359 | ;/* scan_end = *(ushf*)(scan+best_len-1); */ | ||
360 | |||
361 | LongerMatch: | ||
362 | mov ebx, [esp+nicematch] | ||
363 | mov [esp+bestlen], eax | ||
364 | mov [edx].ds_match_start, ecx | ||
365 | cmp eax, ebx | ||
366 | jge SHORT LeaveNow | ||
367 | mov esi, [esp+window] | ||
368 | add esi, eax | ||
369 | mov [esp+windowbestlen], esi | ||
370 | movzx ebx, WORD PTR[edi+eax-1] | ||
371 | mov edi, [edx].ds_prev | ||
372 | mov [esp+scanend], ebx | ||
373 | mov edx, [esp+chainlenwmask] | ||
374 | jmp LookupLoop | ||
375 | ALIGN 4 | ||
376 | |||
377 | ;/* Accept the current string, with the maximum possible length. */ | ||
378 | |||
379 | LenMaximum: | ||
380 | mov edx, [esp+deflatestate] | ||
381 | mov DWORD PTR[esp+bestlen], MAX_MATCH | ||
382 | mov [edx].ds_match_start, ecx | ||
383 | |||
384 | ;/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ | ||
385 | ;/* return s->lookahead; */ | ||
386 | |||
387 | LeaveNow: | ||
388 | mov edx, [esp+deflatestate] | ||
389 | mov ebx, [esp+bestlen] | ||
390 | mov eax, [edx].ds_lookahead | ||
391 | cmp ebx, eax | ||
392 | jg SHORT LookaheadRet | ||
393 | mov eax, ebx | ||
394 | LookaheadRet: | ||
395 | |||
396 | ; Restore the stack and return from whence we came. | ||
397 | |||
398 | add esp, varsize | ||
399 | pop ebx | ||
400 | pop esi | ||
401 | pop edi | ||
402 | pop ebp | ||
403 | ret | ||
404 | |||
405 | _longest_match ENDP | ||
406 | |||
407 | _TEXT ENDS | ||
408 | END | ||