diff options
author | Mark Adler <zlib@madler.net> | 2017-10-12 20:08:53 -0700 |
---|---|---|
committer | Mark Adler <zlib@madler.net> | 2017-10-12 20:27:14 -0700 |
commit | 288f1080317b954b6bdca33708631c011549c008 (patch) | |
tree | 9629f01104722ba8e490f04a0790c56513ba989a /contrib/asm686/match.S | |
parent | a5773513942b1c57d0eff51fcb2ebac72796ed95 (diff) | |
download | zlib-288f1080317b954b6bdca33708631c011549c008.tar.gz zlib-288f1080317b954b6bdca33708631c011549c008.tar.bz2 zlib-288f1080317b954b6bdca33708631c011549c008.zip |
Remove old assembler code in which bugs have manifested.
In addition, there is not sufficient gain from the inflate
assembler code to warrant its inclusion.
Diffstat (limited to 'contrib/asm686/match.S')
-rw-r--r-- | contrib/asm686/match.S | 357 |
1 files changed, 0 insertions, 357 deletions
diff --git a/contrib/asm686/match.S b/contrib/asm686/match.S deleted file mode 100644 index fa42109..0000000 --- a/contrib/asm686/match.S +++ /dev/null | |||
@@ -1,357 +0,0 @@ | |||
1 | /* match.S -- x86 assembly version of the zlib longest_match() function. | ||
2 | * Optimized for the Intel 686 chips (PPro and later). | ||
3 | * | ||
4 | * Copyright (C) 1998, 2007 Brian Raiter <breadbox@muppetlabs.com> | ||
5 | * | ||
6 | * This software is provided 'as-is', without any express or implied | ||
7 | * warranty. In no event will the author be held liable for any damages | ||
8 | * arising from the use of this software. | ||
9 | * | ||
10 | * Permission is granted to anyone to use this software for any purpose, | ||
11 | * including commercial applications, and to alter it and redistribute it | ||
12 | * freely, subject to the following restrictions: | ||
13 | * | ||
14 | * 1. The origin of this software must not be misrepresented; you must not | ||
15 | * claim that you wrote the original software. If you use this software | ||
16 | * in a product, an acknowledgment in the product documentation would be | ||
17 | * appreciated but is not required. | ||
18 | * 2. Altered source versions must be plainly marked as such, and must not be | ||
19 | * misrepresented as being the original software. | ||
20 | * 3. This notice may not be removed or altered from any source distribution. | ||
21 | */ | ||
22 | |||
23 | #ifndef NO_UNDERLINE | ||
24 | #define match_init _match_init | ||
25 | #define longest_match _longest_match | ||
26 | #endif | ||
27 | |||
28 | #define MAX_MATCH (258) | ||
29 | #define MIN_MATCH (3) | ||
30 | #define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1) | ||
31 | #define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7) | ||
32 | |||
33 | /* stack frame offsets */ | ||
34 | |||
35 | #define chainlenwmask 0 /* high word: current chain len */ | ||
36 | /* low word: s->wmask */ | ||
37 | #define window 4 /* local copy of s->window */ | ||
38 | #define windowbestlen 8 /* s->window + bestlen */ | ||
39 | #define scanstart 16 /* first two bytes of string */ | ||
40 | #define scanend 12 /* last two bytes of string */ | ||
41 | #define scanalign 20 /* dword-misalignment of string */ | ||
42 | #define nicematch 24 /* a good enough match size */ | ||
43 | #define bestlen 28 /* size of best match so far */ | ||
44 | #define scan 32 /* ptr to string wanting match */ | ||
45 | |||
46 | #define LocalVarsSize (36) | ||
47 | /* saved ebx 36 */ | ||
48 | /* saved edi 40 */ | ||
49 | /* saved esi 44 */ | ||
50 | /* saved ebp 48 */ | ||
51 | /* return address 52 */ | ||
52 | #define deflatestate 56 /* the function arguments */ | ||
53 | #define curmatch 60 | ||
54 | |||
55 | /* All the +zlib1222add offsets are due to the addition of fields | ||
56 | * in zlib in the deflate_state structure since the asm code was first written | ||
57 | * (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). | ||
58 | * (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). | ||
59 | * if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). | ||
60 | */ | ||
61 | |||
62 | #define zlib1222add (8) | ||
63 | |||
64 | #define dsWSize (36+zlib1222add) | ||
65 | #define dsWMask (44+zlib1222add) | ||
66 | #define dsWindow (48+zlib1222add) | ||
67 | #define dsPrev (56+zlib1222add) | ||
68 | #define dsMatchLen (88+zlib1222add) | ||
69 | #define dsPrevMatch (92+zlib1222add) | ||
70 | #define dsStrStart (100+zlib1222add) | ||
71 | #define dsMatchStart (104+zlib1222add) | ||
72 | #define dsLookahead (108+zlib1222add) | ||
73 | #define dsPrevLen (112+zlib1222add) | ||
74 | #define dsMaxChainLen (116+zlib1222add) | ||
75 | #define dsGoodMatch (132+zlib1222add) | ||
76 | #define dsNiceMatch (136+zlib1222add) | ||
77 | |||
78 | |||
79 | .file "match.S" | ||
80 | |||
81 | .globl match_init, longest_match | ||
82 | |||
83 | .text | ||
84 | |||
85 | /* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */ | ||
86 | .cfi_sections .debug_frame | ||
87 | |||
88 | longest_match: | ||
89 | |||
90 | .cfi_startproc | ||
91 | /* Save registers that the compiler may be using, and adjust %esp to */ | ||
92 | /* make room for our stack frame. */ | ||
93 | |||
94 | pushl %ebp | ||
95 | .cfi_def_cfa_offset 8 | ||
96 | .cfi_offset ebp, -8 | ||
97 | pushl %edi | ||
98 | .cfi_def_cfa_offset 12 | ||
99 | pushl %esi | ||
100 | .cfi_def_cfa_offset 16 | ||
101 | pushl %ebx | ||
102 | .cfi_def_cfa_offset 20 | ||
103 | subl $LocalVarsSize, %esp | ||
104 | .cfi_def_cfa_offset LocalVarsSize+20 | ||
105 | |||
106 | /* Retrieve the function arguments. %ecx will hold cur_match */ | ||
107 | /* throughout the entire function. %edx will hold the pointer to the */ | ||
108 | /* deflate_state structure during the function's setup (before */ | ||
109 | /* entering the main loop). */ | ||
110 | |||
111 | movl deflatestate(%esp), %edx | ||
112 | movl curmatch(%esp), %ecx | ||
113 | |||
114 | /* uInt wmask = s->w_mask; */ | ||
115 | /* unsigned chain_length = s->max_chain_length; */ | ||
116 | /* if (s->prev_length >= s->good_match) { */ | ||
117 | /* chain_length >>= 2; */ | ||
118 | /* } */ | ||
119 | |||
120 | movl dsPrevLen(%edx), %eax | ||
121 | movl dsGoodMatch(%edx), %ebx | ||
122 | cmpl %ebx, %eax | ||
123 | movl dsWMask(%edx), %eax | ||
124 | movl dsMaxChainLen(%edx), %ebx | ||
125 | jl LastMatchGood | ||
126 | shrl $2, %ebx | ||
127 | LastMatchGood: | ||
128 | |||
129 | /* chainlen is decremented once beforehand so that the function can */ | ||
130 | /* use the sign flag instead of the zero flag for the exit test. */ | ||
131 | /* It is then shifted into the high word, to make room for the wmask */ | ||
132 | /* value, which it will always accompany. */ | ||
133 | |||
134 | decl %ebx | ||
135 | shll $16, %ebx | ||
136 | orl %eax, %ebx | ||
137 | movl %ebx, chainlenwmask(%esp) | ||
138 | |||
139 | /* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */ | ||
140 | |||
141 | movl dsNiceMatch(%edx), %eax | ||
142 | movl dsLookahead(%edx), %ebx | ||
143 | cmpl %eax, %ebx | ||
144 | jl LookaheadLess | ||
145 | movl %eax, %ebx | ||
146 | LookaheadLess: movl %ebx, nicematch(%esp) | ||
147 | |||
148 | /* register Bytef *scan = s->window + s->strstart; */ | ||
149 | |||
150 | movl dsWindow(%edx), %esi | ||
151 | movl %esi, window(%esp) | ||
152 | movl dsStrStart(%edx), %ebp | ||
153 | lea (%esi,%ebp), %edi | ||
154 | movl %edi, scan(%esp) | ||
155 | |||
156 | /* Determine how many bytes the scan ptr is off from being */ | ||
157 | /* dword-aligned. */ | ||
158 | |||
159 | movl %edi, %eax | ||
160 | negl %eax | ||
161 | andl $3, %eax | ||
162 | movl %eax, scanalign(%esp) | ||
163 | |||
164 | /* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */ | ||
165 | /* s->strstart - (IPos)MAX_DIST(s) : NIL; */ | ||
166 | |||
167 | movl dsWSize(%edx), %eax | ||
168 | subl $MIN_LOOKAHEAD, %eax | ||
169 | subl %eax, %ebp | ||
170 | jg LimitPositive | ||
171 | xorl %ebp, %ebp | ||
172 | LimitPositive: | ||
173 | |||
174 | /* int best_len = s->prev_length; */ | ||
175 | |||
176 | movl dsPrevLen(%edx), %eax | ||
177 | movl %eax, bestlen(%esp) | ||
178 | |||
179 | /* Store the sum of s->window + best_len in %esi locally, and in %esi. */ | ||
180 | |||
181 | addl %eax, %esi | ||
182 | movl %esi, windowbestlen(%esp) | ||
183 | |||
184 | /* register ush scan_start = *(ushf*)scan; */ | ||
185 | /* register ush scan_end = *(ushf*)(scan+best_len-1); */ | ||
186 | /* Posf *prev = s->prev; */ | ||
187 | |||
188 | movzwl (%edi), %ebx | ||
189 | movl %ebx, scanstart(%esp) | ||
190 | movzwl -1(%edi,%eax), %ebx | ||
191 | movl %ebx, scanend(%esp) | ||
192 | movl dsPrev(%edx), %edi | ||
193 | |||
194 | /* Jump into the main loop. */ | ||
195 | |||
196 | movl chainlenwmask(%esp), %edx | ||
197 | jmp LoopEntry | ||
198 | |||
199 | .balign 16 | ||
200 | |||
201 | /* do { | ||
202 | * match = s->window + cur_match; | ||
203 | * if (*(ushf*)(match+best_len-1) != scan_end || | ||
204 | * *(ushf*)match != scan_start) continue; | ||
205 | * [...] | ||
206 | * } while ((cur_match = prev[cur_match & wmask]) > limit | ||
207 | * && --chain_length != 0); | ||
208 | * | ||
209 | * Here is the inner loop of the function. The function will spend the | ||
210 | * majority of its time in this loop, and majority of that time will | ||
211 | * be spent in the first ten instructions. | ||
212 | * | ||
213 | * Within this loop: | ||
214 | * %ebx = scanend | ||
215 | * %ecx = curmatch | ||
216 | * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) | ||
217 | * %esi = windowbestlen - i.e., (window + bestlen) | ||
218 | * %edi = prev | ||
219 | * %ebp = limit | ||
220 | */ | ||
221 | LookupLoop: | ||
222 | andl %edx, %ecx | ||
223 | movzwl (%edi,%ecx,2), %ecx | ||
224 | cmpl %ebp, %ecx | ||
225 | jbe LeaveNow | ||
226 | subl $0x00010000, %edx | ||
227 | js LeaveNow | ||
228 | LoopEntry: movzwl -1(%esi,%ecx), %eax | ||
229 | cmpl %ebx, %eax | ||
230 | jnz LookupLoop | ||
231 | movl window(%esp), %eax | ||
232 | movzwl (%eax,%ecx), %eax | ||
233 | cmpl scanstart(%esp), %eax | ||
234 | jnz LookupLoop | ||
235 | |||
236 | /* Store the current value of chainlen. */ | ||
237 | |||
238 | movl %edx, chainlenwmask(%esp) | ||
239 | |||
240 | /* Point %edi to the string under scrutiny, and %esi to the string we */ | ||
241 | /* are hoping to match it up with. In actuality, %esi and %edi are */ | ||
242 | /* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */ | ||
243 | /* initialized to -(MAX_MATCH_8 - scanalign). */ | ||
244 | |||
245 | movl window(%esp), %esi | ||
246 | movl scan(%esp), %edi | ||
247 | addl %ecx, %esi | ||
248 | movl scanalign(%esp), %eax | ||
249 | movl $(-MAX_MATCH_8), %edx | ||
250 | lea MAX_MATCH_8(%edi,%eax), %edi | ||
251 | lea MAX_MATCH_8(%esi,%eax), %esi | ||
252 | |||
253 | /* Test the strings for equality, 8 bytes at a time. At the end, | ||
254 | * adjust %edx so that it is offset to the exact byte that mismatched. | ||
255 | * | ||
256 | * We already know at this point that the first three bytes of the | ||
257 | * strings match each other, and they can be safely passed over before | ||
258 | * starting the compare loop. So what this code does is skip over 0-3 | ||
259 | * bytes, as much as necessary in order to dword-align the %edi | ||
260 | * pointer. (%esi will still be misaligned three times out of four.) | ||
261 | * | ||
262 | * It should be confessed that this loop usually does not represent | ||
263 | * much of the total running time. Replacing it with a more | ||
264 | * straightforward "rep cmpsb" would not drastically degrade | ||
265 | * performance. | ||
266 | */ | ||
267 | LoopCmps: | ||
268 | movl (%esi,%edx), %eax | ||
269 | xorl (%edi,%edx), %eax | ||
270 | jnz LeaveLoopCmps | ||
271 | movl 4(%esi,%edx), %eax | ||
272 | xorl 4(%edi,%edx), %eax | ||
273 | jnz LeaveLoopCmps4 | ||
274 | addl $8, %edx | ||
275 | jnz LoopCmps | ||
276 | jmp LenMaximum | ||
277 | LeaveLoopCmps4: addl $4, %edx | ||
278 | LeaveLoopCmps: testl $0x0000FFFF, %eax | ||
279 | jnz LenLower | ||
280 | addl $2, %edx | ||
281 | shrl $16, %eax | ||
282 | LenLower: subb $1, %al | ||
283 | adcl $0, %edx | ||
284 | |||
285 | /* Calculate the length of the match. If it is longer than MAX_MATCH, */ | ||
286 | /* then automatically accept it as the best possible match and leave. */ | ||
287 | |||
288 | lea (%edi,%edx), %eax | ||
289 | movl scan(%esp), %edi | ||
290 | subl %edi, %eax | ||
291 | cmpl $MAX_MATCH, %eax | ||
292 | jge LenMaximum | ||
293 | |||
294 | /* If the length of the match is not longer than the best match we */ | ||
295 | /* have so far, then forget it and return to the lookup loop. */ | ||
296 | |||
297 | movl deflatestate(%esp), %edx | ||
298 | movl bestlen(%esp), %ebx | ||
299 | cmpl %ebx, %eax | ||
300 | jg LongerMatch | ||
301 | movl windowbestlen(%esp), %esi | ||
302 | movl dsPrev(%edx), %edi | ||
303 | movl scanend(%esp), %ebx | ||
304 | movl chainlenwmask(%esp), %edx | ||
305 | jmp LookupLoop | ||
306 | |||
307 | /* s->match_start = cur_match; */ | ||
308 | /* best_len = len; */ | ||
309 | /* if (len >= nice_match) break; */ | ||
310 | /* scan_end = *(ushf*)(scan+best_len-1); */ | ||
311 | |||
312 | LongerMatch: movl nicematch(%esp), %ebx | ||
313 | movl %eax, bestlen(%esp) | ||
314 | movl %ecx, dsMatchStart(%edx) | ||
315 | cmpl %ebx, %eax | ||
316 | jge LeaveNow | ||
317 | movl window(%esp), %esi | ||
318 | addl %eax, %esi | ||
319 | movl %esi, windowbestlen(%esp) | ||
320 | movzwl -1(%edi,%eax), %ebx | ||
321 | movl dsPrev(%edx), %edi | ||
322 | movl %ebx, scanend(%esp) | ||
323 | movl chainlenwmask(%esp), %edx | ||
324 | jmp LookupLoop | ||
325 | |||
326 | /* Accept the current string, with the maximum possible length. */ | ||
327 | |||
328 | LenMaximum: movl deflatestate(%esp), %edx | ||
329 | movl $MAX_MATCH, bestlen(%esp) | ||
330 | movl %ecx, dsMatchStart(%edx) | ||
331 | |||
332 | /* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */ | ||
333 | /* return s->lookahead; */ | ||
334 | |||
335 | LeaveNow: | ||
336 | movl deflatestate(%esp), %edx | ||
337 | movl bestlen(%esp), %ebx | ||
338 | movl dsLookahead(%edx), %eax | ||
339 | cmpl %eax, %ebx | ||
340 | jg LookaheadRet | ||
341 | movl %ebx, %eax | ||
342 | LookaheadRet: | ||
343 | |||
344 | /* Restore the stack and return from whence we came. */ | ||
345 | |||
346 | addl $LocalVarsSize, %esp | ||
347 | .cfi_def_cfa_offset 20 | ||
348 | popl %ebx | ||
349 | .cfi_def_cfa_offset 16 | ||
350 | popl %esi | ||
351 | .cfi_def_cfa_offset 12 | ||
352 | popl %edi | ||
353 | .cfi_def_cfa_offset 8 | ||
354 | popl %ebp | ||
355 | .cfi_def_cfa_offset 4 | ||
356 | .cfi_endproc | ||
357 | match_init: ret | ||