diff options
author | Mark Adler <madler@alumni.caltech.edu> | 2011-09-09 23:24:43 -0700 |
---|---|---|
committer | Mark Adler <madler@alumni.caltech.edu> | 2011-09-09 23:24:43 -0700 |
commit | 6b8233bfe00e79134cb1b84fc49d4f750a797f79 (patch) | |
tree | ca2b03b0169568681dc3d9c823e9f0bc4417d6b5 /contrib/masmx86/gvmat32c.c | |
parent | 0484693e1723bbab791c56f95597bd7dbe867d03 (diff) | |
download | zlib-6b8233bfe00e79134cb1b84fc49d4f750a797f79.tar.gz zlib-6b8233bfe00e79134cb1b84fc49d4f750a797f79.tar.bz2 zlib-6b8233bfe00e79134cb1b84fc49d4f750a797f79.zip |
zlib 1.2.2.3v1.2.2.3
Diffstat (limited to 'contrib/masmx86/gvmat32c.c')
-rw-r--r-- | contrib/masmx86/gvmat32c.c | 268 |
1 files changed, 62 insertions, 206 deletions
diff --git a/contrib/masmx86/gvmat32c.c b/contrib/masmx86/gvmat32c.c index 9ed25f3..7ad2b27 100644 --- a/contrib/masmx86/gvmat32c.c +++ b/contrib/masmx86/gvmat32c.c | |||
@@ -1,206 +1,62 @@ | |||
1 | /* gvmat32.c -- C portion of the optimized longest_match for 32 bits x86 | 1 | /* gvmat32.c -- C portion of the optimized longest_match for 32 bits x86 |
2 | * Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. | 2 | * Copyright (C) 1995-1996 Jean-loup Gailly and Gilles Vollant. |
3 | * File written by Gilles Vollant, by modifiying the longest_match | 3 | * File written by Gilles Vollant, by modifiying the longest_match |
4 | * from Jean-loup Gailly in deflate.c | 4 | * from Jean-loup Gailly in deflate.c |
5 | * it prepare all parameters and call the assembly longest_match_gvasm | 5 | * it prepare all parameters and call the assembly longest_match_gvasm |
6 | * longest_match execute standard C code is wmask != 0x7fff | 6 | * longest_match execute standard C code is wmask != 0x7fff |
7 | * (assembly code is faster with a fixed wmask) | 7 | * (assembly code is faster with a fixed wmask) |
8 | * | 8 | * |
9 | */ | 9 | * Read comment at beginning of gvmat32.asm for more information |
10 | 10 | */ | |
11 | #include "deflate.h" | 11 | |
12 | 12 | #if defined(ASMV) && (!defined(NOOLDPENTIUMCODE)) | |
13 | #ifdef ASMV | 13 | #include "deflate.h" |
14 | #define NIL 0 | 14 | |
15 | 15 | /* if your C compiler don't add underline before function name, | |
16 | #define UNALIGNED_OK | 16 | define ADD_UNDERLINE_ASMFUNC */ |
17 | 17 | #ifdef ADD_UNDERLINE_ASMFUNC | |
18 | 18 | #define longest_match_7fff _longest_match_7fff | |
19 | /* if your C compiler don't add underline before function name, | 19 | #define longest_match_686 _longest_match_686 |
20 | define ADD_UNDERLINE_ASMFUNC */ | 20 | #define cpudetect32 _cpudetect32 |
21 | #ifdef ADD_UNDERLINE_ASMFUNC | 21 | #endif |
22 | #define longest_match_7fff _longest_match_7fff | 22 | |
23 | #define longest_match_686 _longest_match_686 | 23 | |
24 | #define cpudetect32 _cpudetect32 | 24 | unsigned long cpudetect32(); |
25 | #endif | 25 | |
26 | 26 | uInt longest_match_c( | |
27 | 27 | deflate_state *s, | |
28 | 28 | IPos cur_match); /* current match */ | |
29 | void match_init() | 29 | |
30 | { | 30 | |
31 | } | 31 | uInt longest_match_7fff( |
32 | 32 | deflate_state *s, | |
33 | unsigned long cpudetect32(); | 33 | IPos cur_match); /* current match */ |
34 | 34 | ||
35 | uInt longest_match_c( | 35 | uInt longest_match_686( |
36 | deflate_state *s, | 36 | deflate_state *s, |
37 | IPos cur_match); /* current match */ | 37 | IPos cur_match); /* current match */ |
38 | 38 | ||
39 | 39 | ||
40 | uInt longest_match_7fff( | 40 | static uInt iIsPPro=2; |
41 | deflate_state *s, | 41 | |
42 | IPos cur_match); /* current match */ | 42 | void match_init () |
43 | 43 | { | |
44 | uInt longest_match_686( | 44 | iIsPPro = (((cpudetect32()/0x100)&0xf)>=6) ? 1 : 0; |
45 | deflate_state *s, | 45 | } |
46 | IPos cur_match); /* current match */ | 46 | |
47 | 47 | uInt longest_match( | |
48 | uInt longest_match( | 48 | deflate_state *s, |
49 | deflate_state *s, | 49 | IPos cur_match) /* current match */ |
50 | IPos cur_match) /* current match */ | 50 | { |
51 | { | 51 | if (iIsPPro!=0) |
52 | static uInt iIsPPro=2; | 52 | return longest_match_686(s,cur_match); |
53 | 53 | ||
54 | if ((s->w_mask == 0x7fff) && (iIsPPro==0)) | 54 | if (s->w_mask != 0x7fff) |
55 | return longest_match_7fff(s,cur_match); | 55 | return longest_match_686(s,cur_match); |
56 | 56 | ||
57 | if (iIsPPro==1) | 57 | /* now ((s->w_mask == 0x7fff) && (iIsPPro==0)) */ |
58 | return longest_match_686(s,cur_match); | 58 | return longest_match_7fff(s,cur_match); |
59 | 59 | } | |
60 | if (iIsPPro==2) | 60 | |
61 | iIsPPro = (((cpudetect32()/0x100)&0xf)>=6) ? 1 : 0; | 61 | |
62 | 62 | #endif /* defined(ASMV) && (!defined(NOOLDPENTIUMCODE)) */ | |
63 | return longest_match_c(s,cur_match); | ||
64 | } | ||
65 | |||
66 | |||
67 | |||
68 | uInt longest_match_c(s, cur_match) | ||
69 | deflate_state *s; | ||
70 | IPos cur_match; /* current match */ | ||
71 | { | ||
72 | unsigned chain_length = s->max_chain_length;/* max hash chain length */ | ||
73 | register Bytef *scan = s->window + s->strstart; /* current string */ | ||
74 | register Bytef *match; /* matched string */ | ||
75 | register int len; /* length of current match */ | ||
76 | int best_len = s->prev_length; /* best match length so far */ | ||
77 | int nice_match = s->nice_match; /* stop if match long enough */ | ||
78 | IPos limit = s->strstart > (IPos)MAX_DIST(s) ? | ||
79 | s->strstart - (IPos)MAX_DIST(s) : NIL; | ||
80 | /* Stop when cur_match becomes <= limit. To simplify the code, | ||
81 | * we prevent matches with the string of window index 0. | ||
82 | */ | ||
83 | Posf *prev = s->prev; | ||
84 | uInt wmask = s->w_mask; | ||
85 | |||
86 | #ifdef UNALIGNED_OK | ||
87 | /* Compare two bytes at a time. Note: this is not always beneficial. | ||
88 | * Try with and without -DUNALIGNED_OK to check. | ||
89 | */ | ||
90 | register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; | ||
91 | register ush scan_start = *(ushf*)scan; | ||
92 | register ush scan_end = *(ushf*)(scan+best_len-1); | ||
93 | #else | ||
94 | register Bytef *strend = s->window + s->strstart + MAX_MATCH; | ||
95 | register Byte scan_end1 = scan[best_len-1]; | ||
96 | register Byte scan_end = scan[best_len]; | ||
97 | #endif | ||
98 | |||
99 | /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. | ||
100 | * It is easy to get rid of this optimization if necessary. | ||
101 | */ | ||
102 | Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); | ||
103 | |||
104 | /* Do not waste too much time if we already have a good match: */ | ||
105 | if (s->prev_length >= s->good_match) { | ||
106 | chain_length >>= 2; | ||
107 | } | ||
108 | /* Do not look for matches beyond the end of the input. This is necessary | ||
109 | * to make deflate deterministic. | ||
110 | */ | ||
111 | if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; | ||
112 | |||
113 | Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); | ||
114 | |||
115 | do { | ||
116 | Assert(cur_match < s->strstart, "no future"); | ||
117 | match = s->window + cur_match; | ||
118 | |||
119 | /* Skip to next match if the match length cannot increase | ||
120 | * or if the match length is less than 2: | ||
121 | */ | ||
122 | #if (defined(UNALIGNED_OK) && MAX_MATCH == 258) | ||
123 | /* This code assumes sizeof(unsigned short) == 2. Do not use | ||
124 | * UNALIGNED_OK if your compiler uses a different size. | ||
125 | */ | ||
126 | if (*(ushf*)(match+best_len-1) != scan_end || | ||
127 | *(ushf*)match != scan_start) continue; | ||
128 | |||
129 | /* It is not necessary to compare scan[2] and match[2] since they are | ||
130 | * always equal when the other bytes match, given that the hash keys | ||
131 | * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at | ||
132 | * strstart+3, +5, ... up to strstart+257. We check for insufficient | ||
133 | * lookahead only every 4th comparison; the 128th check will be made | ||
134 | * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is | ||
135 | * necessary to put more guard bytes at the end of the window, or | ||
136 | * to check more often for insufficient lookahead. | ||
137 | */ | ||
138 | Assert(scan[2] == match[2], "scan[2]?"); | ||
139 | scan++, match++; | ||
140 | do { | ||
141 | } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && | ||
142 | *(ushf*)(scan+=2) == *(ushf*)(match+=2) && | ||
143 | *(ushf*)(scan+=2) == *(ushf*)(match+=2) && | ||
144 | *(ushf*)(scan+=2) == *(ushf*)(match+=2) && | ||
145 | scan < strend); | ||
146 | /* The funny "do {}" generates better code on most compilers */ | ||
147 | |||
148 | /* Here, scan <= window+strstart+257 */ | ||
149 | Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); | ||
150 | if (*scan == *match) scan++; | ||
151 | |||
152 | len = (MAX_MATCH - 1) - (int)(strend-scan); | ||
153 | scan = strend - (MAX_MATCH-1); | ||
154 | |||
155 | #else /* UNALIGNED_OK */ | ||
156 | |||
157 | if (match[best_len] != scan_end || | ||
158 | match[best_len-1] != scan_end1 || | ||
159 | *match != *scan || | ||
160 | *++match != scan[1]) continue; | ||
161 | |||
162 | /* The check at best_len-1 can be removed because it will be made | ||
163 | * again later. (This heuristic is not always a win.) | ||
164 | * It is not necessary to compare scan[2] and match[2] since they | ||
165 | * are always equal when the other bytes match, given that | ||
166 | * the hash keys are equal and that HASH_BITS >= 8. | ||
167 | */ | ||
168 | scan += 2, match++; | ||
169 | Assert(*scan == *match, "match[2]?"); | ||
170 | |||
171 | /* We check for insufficient lookahead only every 8th comparison; | ||
172 | * the 256th check will be made at strstart+258. | ||
173 | */ | ||
174 | do { | ||
175 | } while (*++scan == *++match && *++scan == *++match && | ||
176 | *++scan == *++match && *++scan == *++match && | ||
177 | *++scan == *++match && *++scan == *++match && | ||
178 | *++scan == *++match && *++scan == *++match && | ||
179 | scan < strend); | ||
180 | |||
181 | Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); | ||
182 | |||
183 | len = MAX_MATCH - (int)(strend - scan); | ||
184 | scan = strend - MAX_MATCH; | ||
185 | |||
186 | #endif /* UNALIGNED_OK */ | ||
187 | |||
188 | if (len > best_len) { | ||
189 | s->match_start = cur_match; | ||
190 | best_len = len; | ||
191 | if (len >= nice_match) break; | ||
192 | #ifdef UNALIGNED_OK | ||
193 | scan_end = *(ushf*)(scan+best_len-1); | ||
194 | #else | ||
195 | scan_end1 = scan[best_len-1]; | ||
196 | scan_end = scan[best_len]; | ||
197 | #endif | ||
198 | } | ||
199 | } while ((cur_match = prev[cur_match & wmask]) > limit | ||
200 | && --chain_length != 0); | ||
201 | |||
202 | if ((uInt)best_len <= s->lookahead) return (uInt)best_len; | ||
203 | return s->lookahead; | ||
204 | } | ||
205 | |||
206 | #endif /* ASMV */ | ||