diff options
| author | Denys Vlasenko <dvlasenk@redhat.com> | 2010-10-29 16:05:05 +0200 |
|---|---|---|
| committer | Denys Vlasenko <dvlasenk@redhat.com> | 2010-10-29 16:05:05 +0200 |
| commit | 36ef0a677ee26ea007e0620227d361e14d3940ef (patch) | |
| tree | 812705668445be27ed47a300e170f87344968fa9 | |
| parent | fb132e47370378474c68ad22c1c0cb2ccee178de (diff) | |
| download | busybox-w32-36ef0a677ee26ea007e0620227d361e14d3940ef.tar.gz busybox-w32-36ef0a677ee26ea007e0620227d361e14d3940ef.tar.bz2 busybox-w32-36ef0a677ee26ea007e0620227d361e14d3940ef.zip | |
decompress_bunzip2: code shrink
function old new delta
get_next_block 1828 1827 -1
get_bits 164 156 -8
read_bunzip 304 261 -43
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-52) Total: -52 bytes
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
| -rw-r--r-- | archival/libunarchive/decompress_bunzip2.c | 111 |
1 files changed, 67 insertions, 44 deletions
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c index cf6e1988f..3a5d23345 100644 --- a/archival/libunarchive/decompress_bunzip2.c +++ b/archival/libunarchive/decompress_bunzip2.c | |||
| @@ -16,7 +16,7 @@ | |||
| 16 | function, and various other tweaks. In (limited) tests, approximately | 16 | function, and various other tweaks. In (limited) tests, approximately |
| 17 | 20% faster than bzcat on x86 and about 10% faster on arm. | 17 | 20% faster than bzcat on x86 and about 10% faster on arm. |
| 18 | 18 | ||
| 19 | Note that about 2/3 of the time is spent in read_unzip() reversing | 19 | Note that about 2/3 of the time is spent in read_bunzip() reversing |
| 20 | the Burrows-Wheeler transformation. Much of that time is delay | 20 | the Burrows-Wheeler transformation. Much of that time is delay |
| 21 | resulting from cache misses. | 21 | resulting from cache misses. |
| 22 | 22 | ||
| @@ -70,23 +70,25 @@ struct bunzip_data { | |||
| 70 | /* I/O tracking data (file handles, buffers, positions, etc.) */ | 70 | /* I/O tracking data (file handles, buffers, positions, etc.) */ |
| 71 | unsigned inbufBitCount, inbufBits; | 71 | unsigned inbufBitCount, inbufBits; |
| 72 | int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/; | 72 | int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/; |
| 73 | unsigned char *inbuf /*,*outbuf*/; | 73 | uint8_t *inbuf /*,*outbuf*/; |
| 74 | 74 | ||
| 75 | /* State for interrupting output loop */ | 75 | /* State for interrupting output loop */ |
| 76 | int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; | 76 | int writeCopies, writePos, writeRunCountdown, writeCount; |
| 77 | int writeCurrent; /* actually a uint8_t */ | ||
| 77 | 78 | ||
| 78 | /* The CRC values stored in the block header and calculated from the data */ | 79 | /* The CRC values stored in the block header and calculated from the data */ |
| 79 | uint32_t headerCRC, totalCRC, writeCRC; | 80 | uint32_t headerCRC, totalCRC, writeCRC; |
| 80 | 81 | ||
| 81 | /* Intermediate buffer and its size (in bytes) */ | 82 | /* Intermediate buffer and its size (in bytes) */ |
| 82 | unsigned *dbuf, dbufSize; | 83 | uint32_t *dbuf; |
| 84 | unsigned dbufSize; | ||
| 83 | 85 | ||
| 84 | /* For I/O error handling */ | 86 | /* For I/O error handling */ |
| 85 | jmp_buf jmpbuf; | 87 | jmp_buf jmpbuf; |
| 86 | 88 | ||
| 87 | /* Big things go last (register-relative addressing can be larger for big offsets) */ | 89 | /* Big things go last (register-relative addressing can be larger for big offsets) */ |
| 88 | uint32_t crc32Table[256]; | 90 | uint32_t crc32Table[256]; |
| 89 | unsigned char selectors[32768]; /* nSelectors=15 bits */ | 91 | uint8_t selectors[32768]; /* nSelectors=15 bits */ |
| 90 | struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ | 92 | struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ |
| 91 | }; | 93 | }; |
| 92 | /* typedef struct bunzip_data bunzip_data; -- done in .h file */ | 94 | /* typedef struct bunzip_data bunzip_data; -- done in .h file */ |
| @@ -94,14 +96,15 @@ struct bunzip_data { | |||
| 94 | 96 | ||
| 95 | /* Return the next nnn bits of input. All reads from the compressed input | 97 | /* Return the next nnn bits of input. All reads from the compressed input |
| 96 | are done through this function. All reads are big endian */ | 98 | are done through this function. All reads are big endian */ |
| 97 | |||
| 98 | static unsigned get_bits(bunzip_data *bd, int bits_wanted) | 99 | static unsigned get_bits(bunzip_data *bd, int bits_wanted) |
| 99 | { | 100 | { |
| 100 | unsigned bits = 0; | 101 | unsigned bits = 0; |
| 102 | /* Cache bd->inbufBitCount in a CPU register (hopefully): */ | ||
| 103 | int bit_count = bd->inbufBitCount; | ||
| 101 | 104 | ||
| 102 | /* If we need to get more data from the byte buffer, do so. (Loop getting | 105 | /* If we need to get more data from the byte buffer, do so. (Loop getting |
| 103 | one byte at a time to enforce endianness and avoid unaligned access.) */ | 106 | one byte at a time to enforce endianness and avoid unaligned access.) */ |
| 104 | while ((int)(bd->inbufBitCount) < bits_wanted) { | 107 | while (bit_count < bits_wanted) { |
| 105 | 108 | ||
| 106 | /* If we need to read more data from file into byte buffer, do so */ | 109 | /* If we need to read more data from file into byte buffer, do so */ |
| 107 | if (bd->inbufPos == bd->inbufCount) { | 110 | if (bd->inbufPos == bd->inbufCount) { |
| @@ -113,33 +116,35 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted) | |||
| 113 | } | 116 | } |
| 114 | 117 | ||
| 115 | /* Avoid 32-bit overflow (dump bit buffer to top of output) */ | 118 | /* Avoid 32-bit overflow (dump bit buffer to top of output) */ |
| 116 | if (bd->inbufBitCount >= 24) { | 119 | if (bit_count >= 24) { |
| 117 | bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1); | 120 | bits = bd->inbufBits & ((1 << bit_count) - 1); |
| 118 | bits_wanted -= bd->inbufBitCount; | 121 | bits_wanted -= bit_count; |
| 119 | bits <<= bits_wanted; | 122 | bits <<= bits_wanted; |
| 120 | bd->inbufBitCount = 0; | 123 | bit_count = 0; |
| 121 | } | 124 | } |
| 122 | 125 | ||
| 123 | /* Grab next 8 bits of input from buffer. */ | 126 | /* Grab next 8 bits of input from buffer. */ |
| 124 | bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; | 127 | bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; |
| 125 | bd->inbufBitCount += 8; | 128 | bit_count += 8; |
| 126 | } | 129 | } |
| 127 | 130 | ||
| 128 | /* Calculate result */ | 131 | /* Calculate result */ |
| 129 | bd->inbufBitCount -= bits_wanted; | 132 | bit_count -= bits_wanted; |
| 130 | bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1); | 133 | bd->inbufBitCount = bit_count; |
| 134 | bits |= (bd->inbufBits >> bit_count) & ((1 << bits_wanted) - 1); | ||
| 131 | 135 | ||
| 132 | return bits; | 136 | return bits; |
| 133 | } | 137 | } |
| 134 | 138 | ||
| 135 | /* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ | 139 | /* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */ |
| 136 | static int get_next_block(bunzip_data *bd) | 140 | static int get_next_block(bunzip_data *bd) |
| 137 | { | 141 | { |
| 138 | struct group_data *hufGroup; | 142 | struct group_data *hufGroup; |
| 139 | int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector, | 143 | int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector, |
| 140 | i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; | 144 | i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; |
| 141 | unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; | 145 | uint8_t uc, symToByte[256], mtfSymbol[256], *selectors; |
| 142 | unsigned *dbuf, origPtr; | 146 | uint32_t *dbuf; |
| 147 | unsigned origPtr; | ||
| 143 | 148 | ||
| 144 | dbuf = bd->dbuf; | 149 | dbuf = bd->dbuf; |
| 145 | dbufSize = bd->dbufSize; | 150 | dbufSize = bd->dbufSize; |
| @@ -200,7 +205,8 @@ static int get_next_block(bunzip_data *bd) | |||
| 200 | 205 | ||
| 201 | /* Decode MTF to get the next selector */ | 206 | /* Decode MTF to get the next selector */ |
| 202 | uc = mtfSymbol[j]; | 207 | uc = mtfSymbol[j]; |
| 203 | for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1]; | 208 | for (; j; j--) |
| 209 | mtfSymbol[j] = mtfSymbol[j-1]; | ||
| 204 | mtfSymbol[0] = selectors[i] = uc; | 210 | mtfSymbol[0] = selectors[i] = uc; |
| 205 | } | 211 | } |
| 206 | 212 | ||
| @@ -208,7 +214,7 @@ static int get_next_block(bunzip_data *bd) | |||
| 208 | literal symbols, plus two run symbols (RUNA, RUNB) */ | 214 | literal symbols, plus two run symbols (RUNA, RUNB) */ |
| 209 | symCount = symTotal + 2; | 215 | symCount = symTotal + 2; |
| 210 | for (j = 0; j < groupCount; j++) { | 216 | for (j = 0; j < groupCount; j++) { |
| 211 | unsigned char length[MAX_SYMBOLS]; | 217 | uint8_t length[MAX_SYMBOLS]; |
| 212 | /* 8 bits is ALMOST enough for temp[], see below */ | 218 | /* 8 bits is ALMOST enough for temp[], see below */ |
| 213 | unsigned temp[MAX_HUFCODE_BITS+1]; | 219 | unsigned temp[MAX_HUFCODE_BITS+1]; |
| 214 | int minLen, maxLen, pp; | 220 | int minLen, maxLen, pp; |
| @@ -315,7 +321,7 @@ static int get_next_block(bunzip_data *bd) | |||
| 315 | memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */ | 321 | memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */ |
| 316 | for (i = 0; i < 256; i++) { | 322 | for (i = 0; i < 256; i++) { |
| 317 | //byteCount[i] = 0; | 323 | //byteCount[i] = 0; |
| 318 | mtfSymbol[i] = (unsigned char)i; | 324 | mtfSymbol[i] = (uint8_t)i; |
| 319 | } | 325 | } |
| 320 | 326 | ||
| 321 | /* Loop through compressed symbols. */ | 327 | /* Loop through compressed symbols. */ |
| @@ -456,7 +462,7 @@ static int get_next_block(bunzip_data *bd) | |||
| 456 | 462 | ||
| 457 | /* Figure out what order dbuf would be in if we sorted it. */ | 463 | /* Figure out what order dbuf would be in if we sorted it. */ |
| 458 | for (i = 0; i < dbufCount; i++) { | 464 | for (i = 0; i < dbufCount; i++) { |
| 459 | uc = (unsigned char)(dbuf[i] & 0xff); | 465 | uc = (uint8_t)dbuf[i]; |
| 460 | dbuf[byteCount[uc]] |= (i << 8); | 466 | dbuf[byteCount[uc]] |= (i << 8); |
| 461 | byteCount[uc]++; | 467 | byteCount[uc]++; |
| 462 | } | 468 | } |
| @@ -465,10 +471,11 @@ static int get_next_block(bunzip_data *bd) | |||
| 465 | doesn't get output, and if the first three characters are identical | 471 | doesn't get output, and if the first three characters are identical |
| 466 | it doesn't qualify as a run (hence writeRunCountdown=5). */ | 472 | it doesn't qualify as a run (hence writeRunCountdown=5). */ |
| 467 | if (dbufCount) { | 473 | if (dbufCount) { |
| 474 | uint32_t tmp; | ||
| 468 | if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; | 475 | if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; |
| 469 | bd->writePos = dbuf[origPtr]; | 476 | tmp = dbuf[origPtr]; |
| 470 | bd->writeCurrent = (unsigned char)(bd->writePos & 0xff); | 477 | bd->writeCurrent = (uint8_t)tmp; |
| 471 | bd->writePos >>= 8; | 478 | bd->writePos = (tmp >> 8); |
| 472 | bd->writeRunCountdown = 5; | 479 | bd->writeRunCountdown = 5; |
| 473 | } | 480 | } |
| 474 | bd->writeCount = dbufCount; | 481 | bd->writeCount = dbufCount; |
| @@ -476,7 +483,7 @@ static int get_next_block(bunzip_data *bd) | |||
| 476 | return RETVAL_OK; | 483 | return RETVAL_OK; |
| 477 | } | 484 | } |
| 478 | 485 | ||
| 479 | /* Undo burrows-wheeler transform on intermediate buffer to produce output. | 486 | /* Undo Burrows-Wheeler transform on intermediate buffer to produce output. |
| 480 | If start_bunzip was initialized with out_fd=-1, then up to len bytes of | 487 | If start_bunzip was initialized with out_fd=-1, then up to len bytes of |
| 481 | data are written to outbuf. Return value is number of bytes written or | 488 | data are written to outbuf. Return value is number of bytes written or |
| 482 | error (all errors are negative numbers). If out_fd!=-1, outbuf and len | 489 | error (all errors are negative numbers). If out_fd!=-1, outbuf and len |
| @@ -484,7 +491,7 @@ static int get_next_block(bunzip_data *bd) | |||
| 484 | */ | 491 | */ |
| 485 | int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | 492 | int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) |
| 486 | { | 493 | { |
| 487 | const unsigned *dbuf; | 494 | const uint32_t *dbuf; |
| 488 | int pos, current, previous, gotcount; | 495 | int pos, current, previous, gotcount; |
| 489 | 496 | ||
| 490 | /* If last read was short due to end of file, return last block now */ | 497 | /* If last read was short due to end of file, return last block now */ |
| @@ -500,6 +507,7 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
| 500 | Huffman-decoded a block into the intermediate buffer yet). */ | 507 | Huffman-decoded a block into the intermediate buffer yet). */ |
| 501 | if (bd->writeCopies) { | 508 | if (bd->writeCopies) { |
| 502 | 509 | ||
| 510 | dec_writeCopies: | ||
| 503 | /* Inside the loop, writeCopies means extra copies (beyond 1) */ | 511 | /* Inside the loop, writeCopies means extra copies (beyond 1) */ |
| 504 | --bd->writeCopies; | 512 | --bd->writeCopies; |
| 505 | 513 | ||
| @@ -508,10 +516,10 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
| 508 | 516 | ||
| 509 | /* If the output buffer is full, snapshot state and return */ | 517 | /* If the output buffer is full, snapshot state and return */ |
| 510 | if (gotcount >= len) { | 518 | if (gotcount >= len) { |
| 511 | bd->writePos = pos; | 519 | /* Unlikely branch. |
| 512 | bd->writeCurrent = current; | 520 | * Use of "goto" instead of keeping code here |
| 513 | bd->writeCopies++; | 521 | * helps compiler to realize this. */ |
| 514 | return len; | 522 | goto outbuf_full; |
| 515 | } | 523 | } |
| 516 | 524 | ||
| 517 | /* Write next byte into output buffer, updating CRC */ | 525 | /* Write next byte into output buffer, updating CRC */ |
| @@ -521,15 +529,21 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
| 521 | 529 | ||
| 522 | /* Loop now if we're outputting multiple copies of this byte */ | 530 | /* Loop now if we're outputting multiple copies of this byte */ |
| 523 | if (bd->writeCopies) { | 531 | if (bd->writeCopies) { |
| 524 | --bd->writeCopies; | 532 | /* Unlikely branch */ |
| 525 | continue; | 533 | /*--bd->writeCopies;*/ |
| 534 | /*continue;*/ | ||
| 535 | /* Same, but (ab)using other existing --writeCopies operation. | ||
| 536 | * Luckily, this also compiles into just one branch insn: */ | ||
| 537 | goto dec_writeCopies; | ||
| 526 | } | 538 | } |
| 527 | decode_next_byte: | 539 | decode_next_byte: |
| 528 | if (!bd->writeCount--) break; | 540 | if (--bd->writeCount < 0) |
| 541 | break; /* input block is fully consumed, need next one */ | ||
| 542 | |||
| 529 | /* Follow sequence vector to undo Burrows-Wheeler transform */ | 543 | /* Follow sequence vector to undo Burrows-Wheeler transform */ |
| 530 | previous = current; | 544 | previous = current; |
| 531 | pos = dbuf[pos]; | 545 | pos = dbuf[pos]; |
| 532 | current = pos & 0xff; | 546 | current = (uint8_t)pos; |
| 533 | pos >>= 8; | 547 | pos >>= 8; |
| 534 | 548 | ||
| 535 | /* After 3 consecutive copies of the same byte, the 4th | 549 | /* After 3 consecutive copies of the same byte, the 4th |
| @@ -539,7 +553,7 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
| 539 | if (current != previous) | 553 | if (current != previous) |
| 540 | bd->writeRunCountdown = 4; | 554 | bd->writeRunCountdown = 4; |
| 541 | } else { | 555 | } else { |
| 542 | 556 | /* Unlikely branch */ | |
| 543 | /* We have a repeated run, this byte indicates the count */ | 557 | /* We have a repeated run, this byte indicates the count */ |
| 544 | bd->writeCopies = current; | 558 | bd->writeCopies = current; |
| 545 | current = previous; | 559 | current = previous; |
| @@ -551,9 +565,9 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
| 551 | /* Subtract the 1 copy we'd output anyway to get extras */ | 565 | /* Subtract the 1 copy we'd output anyway to get extras */ |
| 552 | --bd->writeCopies; | 566 | --bd->writeCopies; |
| 553 | } | 567 | } |
| 554 | } | 568 | } /* for(;;) */ |
| 555 | 569 | ||
| 556 | /* Decompression of this block completed successfully */ | 570 | /* Decompression of this input block completed successfully */ |
| 557 | bd->writeCRC = ~bd->writeCRC; | 571 | bd->writeCRC = ~bd->writeCRC; |
| 558 | bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; | 572 | bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; |
| 559 | 573 | ||
| @@ -565,16 +579,25 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
| 565 | } | 579 | } |
| 566 | 580 | ||
| 567 | /* Refill the intermediate buffer by Huffman-decoding next block of input */ | 581 | /* Refill the intermediate buffer by Huffman-decoding next block of input */ |
| 568 | /* (previous is just a convenient unused temp variable here) */ | 582 | { |
| 569 | previous = get_next_block(bd); | 583 | int r = get_next_block(bd); |
| 570 | if (previous) { | 584 | if (r) { |
| 571 | bd->writeCount = previous; | 585 | bd->writeCount = r; |
| 572 | return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; | 586 | return (r != RETVAL_LAST_BLOCK) ? r : gotcount; |
| 587 | } | ||
| 573 | } | 588 | } |
| 589 | |||
| 574 | bd->writeCRC = ~0; | 590 | bd->writeCRC = ~0; |
| 575 | pos = bd->writePos; | 591 | pos = bd->writePos; |
| 576 | current = bd->writeCurrent; | 592 | current = bd->writeCurrent; |
| 577 | goto decode_next_byte; | 593 | goto decode_next_byte; |
| 594 | |||
| 595 | outbuf_full: | ||
| 596 | /* Output buffer is full, snapshot state and return */ | ||
| 597 | bd->writePos = pos; | ||
| 598 | bd->writeCurrent = current; | ||
| 599 | bd->writeCopies++; | ||
| 600 | return gotcount; | ||
| 578 | } | 601 | } |
| 579 | 602 | ||
| 580 | /* Allocate the structure, read file header. If in_fd==-1, inbuf must contain | 603 | /* Allocate the structure, read file header. If in_fd==-1, inbuf must contain |
| @@ -607,7 +630,7 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, | |||
| 607 | /* in this case, bd->inbuf is read-only */ | 630 | /* in this case, bd->inbuf is read-only */ |
| 608 | bd->inbuf = (void*)inbuf; /* cast away const-ness */ | 631 | bd->inbuf = (void*)inbuf; /* cast away const-ness */ |
| 609 | } else { | 632 | } else { |
| 610 | bd->inbuf = (unsigned char *)(bd + 1); | 633 | bd->inbuf = (uint8_t*)(bd + 1); |
| 611 | memcpy(bd->inbuf, inbuf, len); | 634 | memcpy(bd->inbuf, inbuf, len); |
| 612 | } | 635 | } |
| 613 | bd->inbufCount = len; | 636 | bd->inbufCount = len; |
| @@ -634,7 +657,7 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, | |||
| 634 | bd->dbufSize = 100000 * (i - h0); | 657 | bd->dbufSize = 100000 * (i - h0); |
| 635 | 658 | ||
| 636 | /* Cannot use xmalloc - may leak bd in NOFORK case! */ | 659 | /* Cannot use xmalloc - may leak bd in NOFORK case! */ |
| 637 | bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(int)); | 660 | bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(bd->dbuf[0])); |
| 638 | if (!bd->dbuf) { | 661 | if (!bd->dbuf) { |
| 639 | free(bd); | 662 | free(bd); |
| 640 | xfunc_die(); | 663 | xfunc_die(); |
