diff options
-rw-r--r-- | archival/libarchive/bz/compress.c | 75 |
1 files changed, 43 insertions, 32 deletions
diff --git a/archival/libarchive/bz/compress.c b/archival/libarchive/bz/compress.c index f65076758..462740b6c 100644 --- a/archival/libarchive/bz/compress.c +++ b/archival/libarchive/bz/compress.c | |||
@@ -158,6 +158,38 @@ void makeMaps_e(EState* s) | |||
158 | 158 | ||
159 | 159 | ||
160 | /*---------------------------------------------------*/ | 160 | /*---------------------------------------------------*/ |
161 | /* | ||
162 | * This bit of code is performance-critical. | ||
163 | * On 32bit x86, gcc-6.3.0 was observed to spill ryy_j to stack, | ||
164 | * resulting in abysmal performance (x3 slowdown). | ||
165 | * Forcing it into a separate function alleviates register pressure, | ||
166 | * and spillage no longer happens. | ||
167 | * Other versions of gcc do not exhibit this problem, but out-of-line code | ||
168 | * seems to be helping them too (code is both smaller and faster). | ||
169 | * Therefore NOINLINE is enabled for the entire 32bit x86 arch for now, | ||
170 | * without a check for gcc version. | ||
171 | */ | ||
172 | static | ||
173 | #if defined __i386__ | ||
174 | NOINLINE | ||
175 | #endif | ||
176 | int inner_loop(uint8_t *yy, uint8_t ll_i) | ||
177 | { | ||
178 | register uint8_t rtmp; | ||
179 | register uint8_t* ryy_j; | ||
180 | rtmp = yy[1]; | ||
181 | yy[1] = yy[0]; | ||
182 | ryy_j = &(yy[1]); | ||
183 | while (ll_i != rtmp) { | ||
184 | register uint8_t rtmp2; | ||
185 | ryy_j++; | ||
186 | rtmp2 = rtmp; | ||
187 | rtmp = *ryy_j; | ||
188 | *ryy_j = rtmp2; | ||
189 | } | ||
190 | yy[0] = rtmp; | ||
191 | return ryy_j - &(yy[0]); | ||
192 | } | ||
161 | static NOINLINE | 193 | static NOINLINE |
162 | void generateMTFValues(EState* s) | 194 | void generateMTFValues(EState* s) |
163 | { | 195 | { |
@@ -165,7 +197,6 @@ void generateMTFValues(EState* s) | |||
165 | int i; | 197 | int i; |
166 | int zPend; | 198 | int zPend; |
167 | int32_t wr; | 199 | int32_t wr; |
168 | int32_t EOB; | ||
169 | 200 | ||
170 | /* | 201 | /* |
171 | * After sorting (eg, here), | 202 | * After sorting (eg, here), |
@@ -189,15 +220,12 @@ void generateMTFValues(EState* s) | |||
189 | * compressBlock(). | 220 | * compressBlock(). |
190 | */ | 221 | */ |
191 | uint32_t* ptr = s->ptr; | 222 | uint32_t* ptr = s->ptr; |
192 | uint8_t* block = s->block; | ||
193 | uint16_t* mtfv = s->mtfv; | ||
194 | 223 | ||
195 | makeMaps_e(s); | 224 | makeMaps_e(s); |
196 | EOB = s->nInUse+1; | ||
197 | 225 | ||
198 | wr = 0; | 226 | wr = 0; |
199 | zPend = 0; | 227 | zPend = 0; |
200 | for (i = 0; i <= EOB; i++) | 228 | for (i = 0; i <= s->nInUse+1; i++) |
201 | s->mtfFreq[i] = 0; | 229 | s->mtfFreq[i] = 0; |
202 | 230 | ||
203 | for (i = 0; i < s->nInUse; i++) | 231 | for (i = 0; i < s->nInUse; i++) |
@@ -211,7 +239,7 @@ void generateMTFValues(EState* s) | |||
211 | j = ptr[i] - 1; | 239 | j = ptr[i] - 1; |
212 | if (j < 0) | 240 | if (j < 0) |
213 | j += s->nblock; | 241 | j += s->nblock; |
214 | ll_i = s->unseqToSeq[block[j]]; | 242 | ll_i = s->unseqToSeq[s->block[j]]; |
215 | AssertD(ll_i < s->nInUse, "generateMTFValues(2a)"); | 243 | AssertD(ll_i < s->nInUse, "generateMTFValues(2a)"); |
216 | 244 | ||
217 | if (yy[0] == ll_i) { | 245 | if (yy[0] == ll_i) { |
@@ -225,15 +253,15 @@ void generateMTFValues(EState* s) | |||
225 | while (1) { | 253 | while (1) { |
226 | #if 0 | 254 | #if 0 |
227 | if (zPend & 1) { | 255 | if (zPend & 1) { |
228 | mtfv[wr] = BZ_RUNB; wr++; | 256 | s->mtfv[wr] = BZ_RUNB; wr++; |
229 | s->mtfFreq[BZ_RUNB]++; | 257 | s->mtfFreq[BZ_RUNB]++; |
230 | } else { | 258 | } else { |
231 | mtfv[wr] = BZ_RUNA; wr++; | 259 | s->mtfv[wr] = BZ_RUNA; wr++; |
232 | s->mtfFreq[BZ_RUNA]++; | 260 | s->mtfFreq[BZ_RUNA]++; |
233 | } | 261 | } |
234 | #else /* same as above, since BZ_RUNA is 0 and BZ_RUNB is 1 */ | 262 | #else /* same as above, since BZ_RUNA is 0 and BZ_RUNB is 1 */ |
235 | unsigned run = zPend & 1; | 263 | unsigned run = zPend & 1; |
236 | mtfv[wr] = run; | 264 | s->mtfv[wr] = run; |
237 | wr++; | 265 | wr++; |
238 | s->mtfFreq[run]++; | 266 | s->mtfFreq[run]++; |
239 | #endif | 267 | #endif |
@@ -247,36 +275,19 @@ void generateMTFValues(EState* s) | |||
247 | goto end; | 275 | goto end; |
248 | zPend = 0; | 276 | zPend = 0; |
249 | } | 277 | } |
250 | { | 278 | j = inner_loop(yy, ll_i); |
251 | register uint8_t rtmp; | 279 | s->mtfv[wr] = j+1; |
252 | register uint8_t* ryy_j; | 280 | wr++; |
253 | register uint8_t rll_i; | 281 | s->mtfFreq[j+1]++; |
254 | rtmp = yy[1]; | ||
255 | yy[1] = yy[0]; | ||
256 | ryy_j = &(yy[1]); | ||
257 | rll_i = ll_i; | ||
258 | while (rll_i != rtmp) { | ||
259 | register uint8_t rtmp2; | ||
260 | ryy_j++; | ||
261 | rtmp2 = rtmp; | ||
262 | rtmp = *ryy_j; | ||
263 | *ryy_j = rtmp2; | ||
264 | } | ||
265 | yy[0] = rtmp; | ||
266 | j = ryy_j - &(yy[0]); | ||
267 | mtfv[wr] = j+1; | ||
268 | wr++; | ||
269 | s->mtfFreq[j+1]++; | ||
270 | } | ||
271 | } | 282 | } |
272 | 283 | ||
273 | i = -1; | 284 | i = -1; |
274 | if (zPend > 0) | 285 | if (zPend > 0) |
275 | goto process_zPend; /* "process it and come back here" */ | 286 | goto process_zPend; /* "process it and come back here" */ |
276 | end: | 287 | end: |
277 | mtfv[wr] = EOB; | 288 | s->mtfv[wr] = s->nInUse+1; |
278 | wr++; | 289 | wr++; |
279 | s->mtfFreq[EOB]++; | 290 | s->mtfFreq[s->nInUse+1]++; |
280 | 291 | ||
281 | s->nMTF = wr; | 292 | s->nMTF = wr; |
282 | } | 293 | } |