aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--archival/libarchive/bz/compress.c75
1 files changed, 43 insertions, 32 deletions
diff --git a/archival/libarchive/bz/compress.c b/archival/libarchive/bz/compress.c
index f65076758..462740b6c 100644
--- a/archival/libarchive/bz/compress.c
+++ b/archival/libarchive/bz/compress.c
@@ -158,6 +158,38 @@ void makeMaps_e(EState* s)
158 158
159 159
160/*---------------------------------------------------*/ 160/*---------------------------------------------------*/
161/*
162 * This bit of code is performance-critical.
163 * On 32bit x86, gcc-6.3.0 was observed to spill ryy_j to stack,
164 * resulting in abysmal performance (x3 slowdown).
165 * Forcing it into a separate function alleviates register pressure,
166 * and spillage no longer happens.
167 * Other versions of gcc do not exhibit this problem, but out-of-line code
168 * seems to be helping them too (code is both smaller and faster).
169 * Therefore NOINLINE is enabled for the entire 32bit x86 arch for now,
170 * without a check for gcc version.
171 */
172static
173#if defined __i386__
174NOINLINE
175#endif
176int inner_loop(uint8_t *yy, uint8_t ll_i)
177{
178 register uint8_t rtmp;
179 register uint8_t* ryy_j;
180 rtmp = yy[1];
181 yy[1] = yy[0];
182 ryy_j = &(yy[1]);
183 while (ll_i != rtmp) {
184 register uint8_t rtmp2;
185 ryy_j++;
186 rtmp2 = rtmp;
187 rtmp = *ryy_j;
188 *ryy_j = rtmp2;
189 }
190 yy[0] = rtmp;
191 return ryy_j - &(yy[0]);
192}
161static NOINLINE 193static NOINLINE
162void generateMTFValues(EState* s) 194void generateMTFValues(EState* s)
163{ 195{
@@ -165,7 +197,6 @@ void generateMTFValues(EState* s)
165 int i; 197 int i;
166 int zPend; 198 int zPend;
167 int32_t wr; 199 int32_t wr;
168 int32_t EOB;
169 200
170 /* 201 /*
171 * After sorting (eg, here), 202 * After sorting (eg, here),
@@ -189,15 +220,12 @@ void generateMTFValues(EState* s)
189 * compressBlock(). 220 * compressBlock().
190 */ 221 */
191 uint32_t* ptr = s->ptr; 222 uint32_t* ptr = s->ptr;
192 uint8_t* block = s->block;
193 uint16_t* mtfv = s->mtfv;
194 223
195 makeMaps_e(s); 224 makeMaps_e(s);
196 EOB = s->nInUse+1;
197 225
198 wr = 0; 226 wr = 0;
199 zPend = 0; 227 zPend = 0;
200 for (i = 0; i <= EOB; i++) 228 for (i = 0; i <= s->nInUse+1; i++)
201 s->mtfFreq[i] = 0; 229 s->mtfFreq[i] = 0;
202 230
203 for (i = 0; i < s->nInUse; i++) 231 for (i = 0; i < s->nInUse; i++)
@@ -211,7 +239,7 @@ void generateMTFValues(EState* s)
211 j = ptr[i] - 1; 239 j = ptr[i] - 1;
212 if (j < 0) 240 if (j < 0)
213 j += s->nblock; 241 j += s->nblock;
214 ll_i = s->unseqToSeq[block[j]]; 242 ll_i = s->unseqToSeq[s->block[j]];
215 AssertD(ll_i < s->nInUse, "generateMTFValues(2a)"); 243 AssertD(ll_i < s->nInUse, "generateMTFValues(2a)");
216 244
217 if (yy[0] == ll_i) { 245 if (yy[0] == ll_i) {
@@ -225,15 +253,15 @@ void generateMTFValues(EState* s)
225 while (1) { 253 while (1) {
226#if 0 254#if 0
227 if (zPend & 1) { 255 if (zPend & 1) {
228 mtfv[wr] = BZ_RUNB; wr++; 256 s->mtfv[wr] = BZ_RUNB; wr++;
229 s->mtfFreq[BZ_RUNB]++; 257 s->mtfFreq[BZ_RUNB]++;
230 } else { 258 } else {
231 mtfv[wr] = BZ_RUNA; wr++; 259 s->mtfv[wr] = BZ_RUNA; wr++;
232 s->mtfFreq[BZ_RUNA]++; 260 s->mtfFreq[BZ_RUNA]++;
233 } 261 }
234#else /* same as above, since BZ_RUNA is 0 and BZ_RUNB is 1 */ 262#else /* same as above, since BZ_RUNA is 0 and BZ_RUNB is 1 */
235 unsigned run = zPend & 1; 263 unsigned run = zPend & 1;
236 mtfv[wr] = run; 264 s->mtfv[wr] = run;
237 wr++; 265 wr++;
238 s->mtfFreq[run]++; 266 s->mtfFreq[run]++;
239#endif 267#endif
@@ -247,36 +275,19 @@ void generateMTFValues(EState* s)
247 goto end; 275 goto end;
248 zPend = 0; 276 zPend = 0;
249 } 277 }
250 { 278 j = inner_loop(yy, ll_i);
251 register uint8_t rtmp; 279 s->mtfv[wr] = j+1;
252 register uint8_t* ryy_j; 280 wr++;
253 register uint8_t rll_i; 281 s->mtfFreq[j+1]++;
254 rtmp = yy[1];
255 yy[1] = yy[0];
256 ryy_j = &(yy[1]);
257 rll_i = ll_i;
258 while (rll_i != rtmp) {
259 register uint8_t rtmp2;
260 ryy_j++;
261 rtmp2 = rtmp;
262 rtmp = *ryy_j;
263 *ryy_j = rtmp2;
264 }
265 yy[0] = rtmp;
266 j = ryy_j - &(yy[0]);
267 mtfv[wr] = j+1;
268 wr++;
269 s->mtfFreq[j+1]++;
270 }
271 } 282 }
272 283
273 i = -1; 284 i = -1;
274 if (zPend > 0) 285 if (zPend > 0)
275 goto process_zPend; /* "process it and come back here" */ 286 goto process_zPend; /* "process it and come back here" */
276 end: 287 end:
277 mtfv[wr] = EOB; 288 s->mtfv[wr] = s->nInUse+1;
278 wr++; 289 wr++;
279 s->mtfFreq[EOB]++; 290 s->mtfFreq[s->nInUse+1]++;
280 291
281 s->nMTF = wr; 292 s->nMTF = wr;
282} 293}