aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <dvlasenk@redhat.com>2010-10-30 00:54:10 +0200
committerDenys Vlasenko <dvlasenk@redhat.com>2010-10-30 00:54:10 +0200
commit0c576975c87c543522ab31566139c078429dff38 (patch)
tree5d9b833809c3c88f3e626cd8b36b57b41f491205
parent5d49b72f1a3a362957171610b722ca8ec3f49734 (diff)
downloadbusybox-w32-0c576975c87c543522ab31566139c078429dff38.tar.gz
busybox-w32-0c576975c87c543522ab31566139c078429dff38.tar.bz2
busybox-w32-0c576975c87c543522ab31566139c078429dff38.zip
decompress_bunzip2: code shrink ~10 bytes
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
-rw-r--r--archival/libunarchive/decompress_bunzip2.c173
1 files changed, 99 insertions, 74 deletions
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c
index 2fa0f898a..2d4867220 100644
--- a/archival/libunarchive/decompress_bunzip2.c
+++ b/archival/libunarchive/decompress_bunzip2.c
@@ -151,8 +151,8 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
151static int get_next_block(bunzip_data *bd) 151static int get_next_block(bunzip_data *bd)
152{ 152{
153 struct group_data *hufGroup; 153 struct group_data *hufGroup;
154 int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector, 154 int dbufCount, dbufSize, groupCount, *base, *limit, selector,
155 i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; 155 i, j, t, runPos, symCount, symTotal, nSelectors, byteCount[256];
156 uint8_t uc, symToByte[256], mtfSymbol[256], *selectors; 156 uint8_t uc, symToByte[256], mtfSymbol[256], *selectors;
157 uint32_t *dbuf; 157 uint32_t *dbuf;
158 unsigned origPtr; 158 unsigned origPtr;
@@ -161,9 +161,12 @@ static int get_next_block(bunzip_data *bd)
161 dbufSize = bd->dbufSize; 161 dbufSize = bd->dbufSize;
162 selectors = bd->selectors; 162 selectors = bd->selectors;
163 163
164/* In bbox, we are ok with aborting through setjmp which is set up in start_bunzip */
165#if 0
164 /* Reset longjmp I/O error handling */ 166 /* Reset longjmp I/O error handling */
165 i = setjmp(bd->jmpbuf); 167 i = setjmp(bd->jmpbuf);
166 if (i) return i; 168 if (i) return i;
169#endif
167 170
168 /* Read in header signature and CRC, then validate signature. 171 /* Read in header signature and CRC, then validate signature.
169 (last block signature means CRC is for whole file, return now) */ 172 (last block signature means CRC is for whole file, return now) */
@@ -185,16 +188,23 @@ static int get_next_block(bunzip_data *bd)
185 symbols to deal with, and writes a sparse bitfield indicating which 188 symbols to deal with, and writes a sparse bitfield indicating which
186 values were present. We make a translation table to convert the symbols 189 values were present. We make a translation table to convert the symbols
187 back to the corresponding bytes. */ 190 back to the corresponding bytes. */
188 t = get_bits(bd, 16);
189 symTotal = 0; 191 symTotal = 0;
190 for (i = 0; i < 16; i++) { 192 i = 0;
191 if (t & (1 << (15-i))) { 193 t = get_bits(bd, 16);
192 k = get_bits(bd, 16); 194 do {
193 for (j = 0; j < 16; j++) 195 if (t & (1 << 15)) {
194 if (k & (1 << (15-j))) 196 unsigned inner_map = get_bits(bd, 16);
195 symToByte[symTotal++] = (16*i) + j; 197 do {
198 if (inner_map & (1 << 15))
199 symToByte[symTotal++] = i;
200 inner_map <<= 1;
201 i++;
202 } while (i & 15);
203 i -= 16;
196 } 204 }
197 } 205 t <<= 1;
206 i += 16;
207 } while (i < 256);
198 208
199 /* How many different Huffman coding groups does this block use? */ 209 /* How many different Huffman coding groups does this block use? */
200 groupCount = get_bits(bd, 3); 210 groupCount = get_bits(bd, 3);
@@ -205,20 +215,24 @@ static int get_next_block(bunzip_data *bd)
205 group. Read in the group selector list, which is stored as MTF encoded 215 group. Read in the group selector list, which is stored as MTF encoded
206 bit runs. (MTF=Move To Front, as each value is used it's moved to the 216 bit runs. (MTF=Move To Front, as each value is used it's moved to the
207 start of the list.) */ 217 start of the list.) */
218 for (i = 0; i < groupCount; i++)
219 mtfSymbol[i] = i;
208 nSelectors = get_bits(bd, 15); 220 nSelectors = get_bits(bd, 15);
209 if (!nSelectors) return RETVAL_DATA_ERROR; 221 if (!nSelectors)
210 for (i = 0; i < groupCount; i++) mtfSymbol[i] = i; 222 return RETVAL_DATA_ERROR;
211 for (i = 0; i < nSelectors; i++) { 223 for (i = 0; i < nSelectors; i++) {
212 224 uint8_t tmp_byte;
213 /* Get next value */ 225 /* Get next value */
214 for (j = 0; get_bits(bd, 1); j++) 226 int n = 0;
215 if (j >= groupCount) return RETVAL_DATA_ERROR; 227 while (get_bits(bd, 1)) {
216 228 if (n >= groupCount) return RETVAL_DATA_ERROR;
229 n++;
230 }
217 /* Decode MTF to get the next selector */ 231 /* Decode MTF to get the next selector */
218 uc = mtfSymbol[j]; 232 tmp_byte = mtfSymbol[n];
219 for (; j; j--) 233 while (--n >= 0)
220 mtfSymbol[j] = mtfSymbol[j-1]; 234 mtfSymbol[n + 1] = mtfSymbol[n];
221 mtfSymbol[0] = selectors[i] = uc; 235 mtfSymbol[0] = selectors[i] = tmp_byte;
222 } 236 }
223 237
224 /* Read the Huffman coding tables for each group, which code for symTotal 238 /* Read the Huffman coding tables for each group, which code for symTotal
@@ -239,20 +253,21 @@ static int get_next_block(bunzip_data *bd)
239 t = get_bits(bd, 5) - 1; 253 t = get_bits(bd, 5) - 1;
240 for (i = 0; i < symCount; i++) { 254 for (i = 0; i < symCount; i++) {
241 for (;;) { 255 for (;;) {
256 int two_bits;
242 if ((unsigned)t > (MAX_HUFCODE_BITS-1)) 257 if ((unsigned)t > (MAX_HUFCODE_BITS-1))
243 return RETVAL_DATA_ERROR; 258 return RETVAL_DATA_ERROR;
244 259
245 /* If first bit is 0, stop. Else second bit indicates whether 260 /* If first bit is 0, stop. Else second bit indicates whether
246 to increment or decrement the value. Optimization: grab 2 261 to increment or decrement the value. Optimization: grab 2
247 bits and unget the second if the first was 0. */ 262 bits and unget the second if the first was 0. */
248 k = get_bits(bd, 2); 263 two_bits = get_bits(bd, 2);
249 if (k < 2) { 264 if (two_bits < 2) {
250 bd->inbufBitCount++; 265 bd->inbufBitCount++;
251 break; 266 break;
252 } 267 }
253 268
254 /* Add one if second bit 1, else subtract 1. Avoids if/else */ 269 /* Add one if second bit 1, else subtract 1. Avoids if/else */
255 t += (((k+1) & 2) - 1); 270 t += (((two_bits+1) & 2) - 1);
256 } 271 }
257 272
258 /* Correct for the initial -1, to get the final symbol length */ 273 /* Correct for the initial -1, to get the final symbol length */
@@ -282,17 +297,18 @@ static int get_next_block(bunzip_data *bd)
282 297
283 /* Note that minLen can't be smaller than 1, so we adjust the base 298 /* Note that minLen can't be smaller than 1, so we adjust the base
284 and limit array pointers so we're not always wasting the first 299 and limit array pointers so we're not always wasting the first
285 entry. We do this again when using them (during symbol decoding).*/ 300 entry. We do this again when using them (during symbol decoding). */
286 base = hufGroup->base - 1; 301 base = hufGroup->base - 1;
287 limit = hufGroup->limit - 1; 302 limit = hufGroup->limit - 1;
288 303
289 /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ 304 /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */
290 pp = 0; 305 pp = 0;
291 for (i = minLen; i <= maxLen; i++) { 306 for (i = minLen; i <= maxLen; i++) {
307 int k;
292 temp[i] = limit[i] = 0; 308 temp[i] = limit[i] = 0;
293 for (t = 0; t < symCount; t++) 309 for (k = 0; k < symCount; k++)
294 if (length[t] == i) 310 if (length[k] == i)
295 hufGroup->permute[pp++] = t; 311 hufGroup->permute[pp++] = k;
296 } 312 }
297 313
298 /* Count symbols coded for at each bit length */ 314 /* Count symbols coded for at each bit length */
@@ -305,8 +321,10 @@ static int get_next_block(bunzip_data *bd)
305 * base[] (number of symbols to ignore at each bit length, which is 321 * base[] (number of symbols to ignore at each bit length, which is
306 * limit minus the cumulative count of symbols coded for already). */ 322 * limit minus the cumulative count of symbols coded for already). */
307 pp = t = 0; 323 pp = t = 0;
308 for (i = minLen; i < maxLen; i++) { 324 for (i = minLen; i < maxLen;) {
309 pp += temp[i]; 325 unsigned temp_i = temp[i];
326
327 pp += temp_i;
310 328
311 /* We read the largest possible symbol size and then unget bits 329 /* We read the largest possible symbol size and then unget bits
312 after determining how many we need, and those extra bits could 330 after determining how many we need, and those extra bits could
@@ -316,8 +334,8 @@ static int get_next_block(bunzip_data *bd)
316 don't affect the value>limit[length] comparison. */ 334 don't affect the value>limit[length] comparison. */
317 limit[i] = (pp << (maxLen - i)) - 1; 335 limit[i] = (pp << (maxLen - i)) - 1;
318 pp <<= 1; 336 pp <<= 1;
319 t += temp[i]; 337 t += temp_i;
320 base[i+1] = pp - t; 338 base[++i] = pp - t;
321 } 339 }
322 limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */ 340 limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */
323 limit[maxLen] = pp + temp[maxLen] - 1; 341 limit[maxLen] = pp + temp[maxLen] - 1;
@@ -329,9 +347,9 @@ static int get_next_block(bunzip_data *bd)
329 and run length encoding, saving the result into dbuf[dbufCount++] = uc */ 347 and run length encoding, saving the result into dbuf[dbufCount++] = uc */
330 348
331 /* Initialize symbol occurrence counters and symbol Move To Front table */ 349 /* Initialize symbol occurrence counters and symbol Move To Front table */
332 memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */ 350 /*memset(byteCount, 0, sizeof(byteCount)); - smaller, but slower */
333 for (i = 0; i < 256; i++) { 351 for (i = 0; i < 256; i++) {
334 //byteCount[i] = 0; 352 byteCount[i] = 0;
335 mtfSymbol[i] = (uint8_t)i; 353 mtfSymbol[i] = (uint8_t)i;
336 } 354 }
337 355
@@ -339,6 +357,7 @@ static int get_next_block(bunzip_data *bd)
339 357
340 runPos = dbufCount = selector = 0; 358 runPos = dbufCount = selector = 0;
341 for (;;) { 359 for (;;) {
360 int nextSym;
342 361
343 /* Fetch next Huffman coding group from list. */ 362 /* Fetch next Huffman coding group from list. */
344 symCount = GROUP_SIZE - 1; 363 symCount = GROUP_SIZE - 1;
@@ -346,44 +365,49 @@ static int get_next_block(bunzip_data *bd)
346 hufGroup = bd->groups + selectors[selector++]; 365 hufGroup = bd->groups + selectors[selector++];
347 base = hufGroup->base - 1; 366 base = hufGroup->base - 1;
348 limit = hufGroup->limit - 1; 367 limit = hufGroup->limit - 1;
349 continue_this_group:
350 368
369 continue_this_group:
351 /* Read next Huffman-coded symbol. */ 370 /* Read next Huffman-coded symbol. */
352 371
353 /* Note: It is far cheaper to read maxLen bits and back up than it is 372 /* Note: It is far cheaper to read maxLen bits and back up than it is
354 to read minLen bits and then an additional bit at a time, testing 373 to read minLen bits and then add additional bit at a time, testing
355 as we go. Because there is a trailing last block (with file CRC), 374 as we go. Because there is a trailing last block (with file CRC),
356 there is no danger of the overread causing an unexpected EOF for a 375 there is no danger of the overread causing an unexpected EOF for a
357 valid compressed file. As a further optimization, we do the read 376 valid compressed file.
358 inline (falling back to a call to get_bits if the buffer runs
359 dry). The following (up to got_huff_bits:) is equivalent to
360 j = get_bits(bd, hufGroup->maxLen);
361 */ 377 */
362 while ((int)(bd->inbufBitCount) < hufGroup->maxLen) { 378 if (1) {
363 if (bd->inbufPos == bd->inbufCount) { 379 /* As a further optimization, we do the read inline
364 j = get_bits(bd, hufGroup->maxLen); 380 (falling back to a call to get_bits if the buffer runs dry).
365 goto got_huff_bits; 381 */
366 } 382 int new_cnt;
367 bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; 383 while ((new_cnt = bd->inbufBitCount - hufGroup->maxLen) < 0) {
368 bd->inbufBitCount += 8; 384 /* bd->inbufBitCount < hufGroup->maxLen */
369 }; 385 if (bd->inbufPos == bd->inbufCount) {
370 bd->inbufBitCount -= hufGroup->maxLen; 386 nextSym = get_bits(bd, hufGroup->maxLen);
371 j = (bd->inbufBits >> bd->inbufBitCount) & ((1 << hufGroup->maxLen) - 1); 387 goto got_huff_bits;
372 388 }
373 got_huff_bits: 389 bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
374 390 bd->inbufBitCount += 8;
375 /* Figure how how many bits are in next symbol and unget extras */ 391 };
392 bd->inbufBitCount = new_cnt; /* "bd->inbufBitCount -= hufGroup->maxLen;" */
393 nextSym = (bd->inbufBits >> new_cnt) & ((1 << hufGroup->maxLen) - 1);
394 got_huff_bits: ;
395 } else { /* unoptimized equivalent */
396 nextSym = get_bits(bd, hufGroup->maxLen);
397 }
398 /* Figure how many bits are in next symbol and unget extras */
376 i = hufGroup->minLen; 399 i = hufGroup->minLen;
377 while (j > limit[i]) ++i; 400 while (nextSym > limit[i]) ++i;
378 bd->inbufBitCount += (hufGroup->maxLen - i); 401 j = hufGroup->maxLen - i;
402 if (j < 0)
403 return RETVAL_DATA_ERROR;
404 bd->inbufBitCount += j;
379 405
380 /* Huffman decode value to get nextSym (with bounds checking) */ 406 /* Huffman decode value to get nextSym (with bounds checking) */
381 if (i > hufGroup->maxLen) 407 nextSym = (nextSym >> j) - base[i];
408 if ((unsigned)nextSym >= MAX_SYMBOLS)
382 return RETVAL_DATA_ERROR; 409 return RETVAL_DATA_ERROR;
383 j = (j >> (hufGroup->maxLen - i)) - base[i]; 410 nextSym = hufGroup->permute[nextSym];
384 if ((unsigned)j >= MAX_SYMBOLS)
385 return RETVAL_DATA_ERROR;
386 nextSym = hufGroup->permute[j];
387 411
388 /* We have now decoded the symbol, which indicates either a new literal 412 /* We have now decoded the symbol, which indicates either a new literal
389 byte, or a repeated run of the most recent literal byte. First, 413 byte, or a repeated run of the most recent literal byte. First,
@@ -392,7 +416,7 @@ static int get_next_block(bunzip_data *bd)
392 if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */ 416 if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */
393 417
394 /* If this is the start of a new run, zero out counter */ 418 /* If this is the start of a new run, zero out counter */
395 if (!runPos) { 419 if (runPos == 0) {
396 runPos = 1; 420 runPos = 1;
397 t = 0; 421 t = 0;
398 } 422 }
@@ -413,13 +437,13 @@ static int get_next_block(bunzip_data *bd)
413 how many times to repeat the last literal, so append that many 437 how many times to repeat the last literal, so append that many
414 copies to our buffer of decoded symbols (dbuf) now. (The last 438 copies to our buffer of decoded symbols (dbuf) now. (The last
415 literal used is the one at the head of the mtfSymbol array.) */ 439 literal used is the one at the head of the mtfSymbol array.) */
416 if (runPos) { 440 if (runPos != 0) {
417 runPos = 0; 441 uint8_t tmp_byte;
418 if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR; 442 if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR;
419 443 tmp_byte = symToByte[mtfSymbol[0]];
420 uc = symToByte[mtfSymbol[0]]; 444 byteCount[tmp_byte] += t;
421 byteCount[uc] += t; 445 while (--t >= 0) dbuf[dbufCount++] = tmp_byte;
422 while (t--) dbuf[dbufCount++] = uc; 446 runPos = 0;
423 } 447 }
424 448
425 /* Is this the terminating symbol? */ 449 /* Is this the terminating symbol? */
@@ -448,12 +472,12 @@ static int get_next_block(bunzip_data *bd)
448 472
449 /* We have our literal byte. Save it into dbuf. */ 473 /* We have our literal byte. Save it into dbuf. */
450 byteCount[uc]++; 474 byteCount[uc]++;
451 dbuf[dbufCount++] = (unsigned)uc; 475 dbuf[dbufCount++] = (uint32_t)uc;
452 476
453 /* Skip group initialization if we're not done with this group. Done 477 /* Skip group initialization if we're not done with this group. Done
454 * this way to avoid compiler warning. */ 478 * this way to avoid compiler warning. */
455 end_of_huffman_loop: 479 end_of_huffman_loop:
456 if (symCount--) goto continue_this_group; 480 if (--symCount >= 0) goto continue_this_group;
457 } 481 }
458 482
459 /* At this point, we've read all the Huffman-coded symbols (and repeated 483 /* At this point, we've read all the Huffman-coded symbols (and repeated
@@ -466,16 +490,17 @@ static int get_next_block(bunzip_data *bd)
466 /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ 490 /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
467 j = 0; 491 j = 0;
468 for (i = 0; i < 256; i++) { 492 for (i = 0; i < 256; i++) {
469 k = j + byteCount[i]; 493 int tmp_count = j + byteCount[i];
470 byteCount[i] = j; 494 byteCount[i] = j;
471 j = k; 495 j = tmp_count;
472 } 496 }
473 497
474 /* Figure out what order dbuf would be in if we sorted it. */ 498 /* Figure out what order dbuf would be in if we sorted it. */
475 for (i = 0; i < dbufCount; i++) { 499 for (i = 0; i < dbufCount; i++) {
476 uc = (uint8_t)dbuf[i]; 500 uint8_t tmp_byte = (uint8_t)dbuf[i];
477 dbuf[byteCount[uc]] |= (i << 8); 501 int tmp_count = byteCount[tmp_byte];
478 byteCount[uc]++; 502 dbuf[tmp_count] |= (i << 8);
503 byteCount[tmp_byte] = tmp_count + 1;
479 } 504 }
480 505
481 /* Decode first byte by hand to initialize "previous" byte. Note that it 506 /* Decode first byte by hand to initialize "previous" byte. Note that it