diff options
author | Denys Vlasenko <dvlasenk@redhat.com> | 2010-10-29 16:05:05 +0200 |
---|---|---|
committer | Denys Vlasenko <dvlasenk@redhat.com> | 2010-10-29 16:05:05 +0200 |
commit | 36ef0a677ee26ea007e0620227d361e14d3940ef (patch) | |
tree | 812705668445be27ed47a300e170f87344968fa9 | |
parent | fb132e47370378474c68ad22c1c0cb2ccee178de (diff) | |
download | busybox-w32-36ef0a677ee26ea007e0620227d361e14d3940ef.tar.gz busybox-w32-36ef0a677ee26ea007e0620227d361e14d3940ef.tar.bz2 busybox-w32-36ef0a677ee26ea007e0620227d361e14d3940ef.zip |
decompress_bunzip2: code shrink
function old new delta
get_next_block 1828 1827 -1
get_bits 164 156 -8
read_bunzip 304 261 -43
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-52) Total: -52 bytes
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
-rw-r--r-- | archival/libunarchive/decompress_bunzip2.c | 111 |
1 files changed, 67 insertions, 44 deletions
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c index cf6e1988f..3a5d23345 100644 --- a/archival/libunarchive/decompress_bunzip2.c +++ b/archival/libunarchive/decompress_bunzip2.c | |||
@@ -16,7 +16,7 @@ | |||
16 | function, and various other tweaks. In (limited) tests, approximately | 16 | function, and various other tweaks. In (limited) tests, approximately |
17 | 20% faster than bzcat on x86 and about 10% faster on arm. | 17 | 20% faster than bzcat on x86 and about 10% faster on arm. |
18 | 18 | ||
19 | Note that about 2/3 of the time is spent in read_unzip() reversing | 19 | Note that about 2/3 of the time is spent in read_bunzip() reversing |
20 | the Burrows-Wheeler transformation. Much of that time is delay | 20 | the Burrows-Wheeler transformation. Much of that time is delay |
21 | resulting from cache misses. | 21 | resulting from cache misses. |
22 | 22 | ||
@@ -70,23 +70,25 @@ struct bunzip_data { | |||
70 | /* I/O tracking data (file handles, buffers, positions, etc.) */ | 70 | /* I/O tracking data (file handles, buffers, positions, etc.) */ |
71 | unsigned inbufBitCount, inbufBits; | 71 | unsigned inbufBitCount, inbufBits; |
72 | int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/; | 72 | int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/; |
73 | unsigned char *inbuf /*,*outbuf*/; | 73 | uint8_t *inbuf /*,*outbuf*/; |
74 | 74 | ||
75 | /* State for interrupting output loop */ | 75 | /* State for interrupting output loop */ |
76 | int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; | 76 | int writeCopies, writePos, writeRunCountdown, writeCount; |
77 | int writeCurrent; /* actually a uint8_t */ | ||
77 | 78 | ||
78 | /* The CRC values stored in the block header and calculated from the data */ | 79 | /* The CRC values stored in the block header and calculated from the data */ |
79 | uint32_t headerCRC, totalCRC, writeCRC; | 80 | uint32_t headerCRC, totalCRC, writeCRC; |
80 | 81 | ||
81 | /* Intermediate buffer and its size (in bytes) */ | 82 | /* Intermediate buffer and its size (in bytes) */ |
82 | unsigned *dbuf, dbufSize; | 83 | uint32_t *dbuf; |
84 | unsigned dbufSize; | ||
83 | 85 | ||
84 | /* For I/O error handling */ | 86 | /* For I/O error handling */ |
85 | jmp_buf jmpbuf; | 87 | jmp_buf jmpbuf; |
86 | 88 | ||
87 | /* Big things go last (register-relative addressing can be larger for big offsets) */ | 89 | /* Big things go last (register-relative addressing can be larger for big offsets) */ |
88 | uint32_t crc32Table[256]; | 90 | uint32_t crc32Table[256]; |
89 | unsigned char selectors[32768]; /* nSelectors=15 bits */ | 91 | uint8_t selectors[32768]; /* nSelectors=15 bits */ |
90 | struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ | 92 | struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ |
91 | }; | 93 | }; |
92 | /* typedef struct bunzip_data bunzip_data; -- done in .h file */ | 94 | /* typedef struct bunzip_data bunzip_data; -- done in .h file */ |
@@ -94,14 +96,15 @@ struct bunzip_data { | |||
94 | 96 | ||
95 | /* Return the next nnn bits of input. All reads from the compressed input | 97 | /* Return the next nnn bits of input. All reads from the compressed input |
96 | are done through this function. All reads are big endian */ | 98 | are done through this function. All reads are big endian */ |
97 | |||
98 | static unsigned get_bits(bunzip_data *bd, int bits_wanted) | 99 | static unsigned get_bits(bunzip_data *bd, int bits_wanted) |
99 | { | 100 | { |
100 | unsigned bits = 0; | 101 | unsigned bits = 0; |
102 | /* Cache bd->inbufBitCount in a CPU register (hopefully): */ | ||
103 | int bit_count = bd->inbufBitCount; | ||
101 | 104 | ||
102 | /* If we need to get more data from the byte buffer, do so. (Loop getting | 105 | /* If we need to get more data from the byte buffer, do so. (Loop getting |
103 | one byte at a time to enforce endianness and avoid unaligned access.) */ | 106 | one byte at a time to enforce endianness and avoid unaligned access.) */ |
104 | while ((int)(bd->inbufBitCount) < bits_wanted) { | 107 | while (bit_count < bits_wanted) { |
105 | 108 | ||
106 | /* If we need to read more data from file into byte buffer, do so */ | 109 | /* If we need to read more data from file into byte buffer, do so */ |
107 | if (bd->inbufPos == bd->inbufCount) { | 110 | if (bd->inbufPos == bd->inbufCount) { |
@@ -113,33 +116,35 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted) | |||
113 | } | 116 | } |
114 | 117 | ||
115 | /* Avoid 32-bit overflow (dump bit buffer to top of output) */ | 118 | /* Avoid 32-bit overflow (dump bit buffer to top of output) */ |
116 | if (bd->inbufBitCount >= 24) { | 119 | if (bit_count >= 24) { |
117 | bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1); | 120 | bits = bd->inbufBits & ((1 << bit_count) - 1); |
118 | bits_wanted -= bd->inbufBitCount; | 121 | bits_wanted -= bit_count; |
119 | bits <<= bits_wanted; | 122 | bits <<= bits_wanted; |
120 | bd->inbufBitCount = 0; | 123 | bit_count = 0; |
121 | } | 124 | } |
122 | 125 | ||
123 | /* Grab next 8 bits of input from buffer. */ | 126 | /* Grab next 8 bits of input from buffer. */ |
124 | bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; | 127 | bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; |
125 | bd->inbufBitCount += 8; | 128 | bit_count += 8; |
126 | } | 129 | } |
127 | 130 | ||
128 | /* Calculate result */ | 131 | /* Calculate result */ |
129 | bd->inbufBitCount -= bits_wanted; | 132 | bit_count -= bits_wanted; |
130 | bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1); | 133 | bd->inbufBitCount = bit_count; |
134 | bits |= (bd->inbufBits >> bit_count) & ((1 << bits_wanted) - 1); | ||
131 | 135 | ||
132 | return bits; | 136 | return bits; |
133 | } | 137 | } |
134 | 138 | ||
135 | /* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ | 139 | /* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */ |
136 | static int get_next_block(bunzip_data *bd) | 140 | static int get_next_block(bunzip_data *bd) |
137 | { | 141 | { |
138 | struct group_data *hufGroup; | 142 | struct group_data *hufGroup; |
139 | int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector, | 143 | int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector, |
140 | i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; | 144 | i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; |
141 | unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; | 145 | uint8_t uc, symToByte[256], mtfSymbol[256], *selectors; |
142 | unsigned *dbuf, origPtr; | 146 | uint32_t *dbuf; |
147 | unsigned origPtr; | ||
143 | 148 | ||
144 | dbuf = bd->dbuf; | 149 | dbuf = bd->dbuf; |
145 | dbufSize = bd->dbufSize; | 150 | dbufSize = bd->dbufSize; |
@@ -200,7 +205,8 @@ static int get_next_block(bunzip_data *bd) | |||
200 | 205 | ||
201 | /* Decode MTF to get the next selector */ | 206 | /* Decode MTF to get the next selector */ |
202 | uc = mtfSymbol[j]; | 207 | uc = mtfSymbol[j]; |
203 | for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1]; | 208 | for (; j; j--) |
209 | mtfSymbol[j] = mtfSymbol[j-1]; | ||
204 | mtfSymbol[0] = selectors[i] = uc; | 210 | mtfSymbol[0] = selectors[i] = uc; |
205 | } | 211 | } |
206 | 212 | ||
@@ -208,7 +214,7 @@ static int get_next_block(bunzip_data *bd) | |||
208 | literal symbols, plus two run symbols (RUNA, RUNB) */ | 214 | literal symbols, plus two run symbols (RUNA, RUNB) */ |
209 | symCount = symTotal + 2; | 215 | symCount = symTotal + 2; |
210 | for (j = 0; j < groupCount; j++) { | 216 | for (j = 0; j < groupCount; j++) { |
211 | unsigned char length[MAX_SYMBOLS]; | 217 | uint8_t length[MAX_SYMBOLS]; |
212 | /* 8 bits is ALMOST enough for temp[], see below */ | 218 | /* 8 bits is ALMOST enough for temp[], see below */ |
213 | unsigned temp[MAX_HUFCODE_BITS+1]; | 219 | unsigned temp[MAX_HUFCODE_BITS+1]; |
214 | int minLen, maxLen, pp; | 220 | int minLen, maxLen, pp; |
@@ -315,7 +321,7 @@ static int get_next_block(bunzip_data *bd) | |||
315 | memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */ | 321 | memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */ |
316 | for (i = 0; i < 256; i++) { | 322 | for (i = 0; i < 256; i++) { |
317 | //byteCount[i] = 0; | 323 | //byteCount[i] = 0; |
318 | mtfSymbol[i] = (unsigned char)i; | 324 | mtfSymbol[i] = (uint8_t)i; |
319 | } | 325 | } |
320 | 326 | ||
321 | /* Loop through compressed symbols. */ | 327 | /* Loop through compressed symbols. */ |
@@ -456,7 +462,7 @@ static int get_next_block(bunzip_data *bd) | |||
456 | 462 | ||
457 | /* Figure out what order dbuf would be in if we sorted it. */ | 463 | /* Figure out what order dbuf would be in if we sorted it. */ |
458 | for (i = 0; i < dbufCount; i++) { | 464 | for (i = 0; i < dbufCount; i++) { |
459 | uc = (unsigned char)(dbuf[i] & 0xff); | 465 | uc = (uint8_t)dbuf[i]; |
460 | dbuf[byteCount[uc]] |= (i << 8); | 466 | dbuf[byteCount[uc]] |= (i << 8); |
461 | byteCount[uc]++; | 467 | byteCount[uc]++; |
462 | } | 468 | } |
@@ -465,10 +471,11 @@ static int get_next_block(bunzip_data *bd) | |||
465 | doesn't get output, and if the first three characters are identical | 471 | doesn't get output, and if the first three characters are identical |
466 | it doesn't qualify as a run (hence writeRunCountdown=5). */ | 472 | it doesn't qualify as a run (hence writeRunCountdown=5). */ |
467 | if (dbufCount) { | 473 | if (dbufCount) { |
474 | uint32_t tmp; | ||
468 | if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; | 475 | if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; |
469 | bd->writePos = dbuf[origPtr]; | 476 | tmp = dbuf[origPtr]; |
470 | bd->writeCurrent = (unsigned char)(bd->writePos & 0xff); | 477 | bd->writeCurrent = (uint8_t)tmp; |
471 | bd->writePos >>= 8; | 478 | bd->writePos = (tmp >> 8); |
472 | bd->writeRunCountdown = 5; | 479 | bd->writeRunCountdown = 5; |
473 | } | 480 | } |
474 | bd->writeCount = dbufCount; | 481 | bd->writeCount = dbufCount; |
@@ -476,7 +483,7 @@ static int get_next_block(bunzip_data *bd) | |||
476 | return RETVAL_OK; | 483 | return RETVAL_OK; |
477 | } | 484 | } |
478 | 485 | ||
479 | /* Undo burrows-wheeler transform on intermediate buffer to produce output. | 486 | /* Undo Burrows-Wheeler transform on intermediate buffer to produce output. |
480 | If start_bunzip was initialized with out_fd=-1, then up to len bytes of | 487 | If start_bunzip was initialized with out_fd=-1, then up to len bytes of |
481 | data are written to outbuf. Return value is number of bytes written or | 488 | data are written to outbuf. Return value is number of bytes written or |
482 | error (all errors are negative numbers). If out_fd!=-1, outbuf and len | 489 | error (all errors are negative numbers). If out_fd!=-1, outbuf and len |
@@ -484,7 +491,7 @@ static int get_next_block(bunzip_data *bd) | |||
484 | */ | 491 | */ |
485 | int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | 492 | int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) |
486 | { | 493 | { |
487 | const unsigned *dbuf; | 494 | const uint32_t *dbuf; |
488 | int pos, current, previous, gotcount; | 495 | int pos, current, previous, gotcount; |
489 | 496 | ||
490 | /* If last read was short due to end of file, return last block now */ | 497 | /* If last read was short due to end of file, return last block now */ |
@@ -500,6 +507,7 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
500 | Huffman-decoded a block into the intermediate buffer yet). */ | 507 | Huffman-decoded a block into the intermediate buffer yet). */ |
501 | if (bd->writeCopies) { | 508 | if (bd->writeCopies) { |
502 | 509 | ||
510 | dec_writeCopies: | ||
503 | /* Inside the loop, writeCopies means extra copies (beyond 1) */ | 511 | /* Inside the loop, writeCopies means extra copies (beyond 1) */ |
504 | --bd->writeCopies; | 512 | --bd->writeCopies; |
505 | 513 | ||
@@ -508,10 +516,10 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
508 | 516 | ||
509 | /* If the output buffer is full, snapshot state and return */ | 517 | /* If the output buffer is full, snapshot state and return */ |
510 | if (gotcount >= len) { | 518 | if (gotcount >= len) { |
511 | bd->writePos = pos; | 519 | /* Unlikely branch. |
512 | bd->writeCurrent = current; | 520 | * Use of "goto" instead of keeping code here |
513 | bd->writeCopies++; | 521 | * helps compiler to realize this. */ |
514 | return len; | 522 | goto outbuf_full; |
515 | } | 523 | } |
516 | 524 | ||
517 | /* Write next byte into output buffer, updating CRC */ | 525 | /* Write next byte into output buffer, updating CRC */ |
@@ -521,15 +529,21 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
521 | 529 | ||
522 | /* Loop now if we're outputting multiple copies of this byte */ | 530 | /* Loop now if we're outputting multiple copies of this byte */ |
523 | if (bd->writeCopies) { | 531 | if (bd->writeCopies) { |
524 | --bd->writeCopies; | 532 | /* Unlikely branch */ |
525 | continue; | 533 | /*--bd->writeCopies;*/ |
534 | /*continue;*/ | ||
535 | /* Same, but (ab)using other existing --writeCopies operation. | ||
536 | * Luckily, this also compiles into just one branch insn: */ | ||
537 | goto dec_writeCopies; | ||
526 | } | 538 | } |
527 | decode_next_byte: | 539 | decode_next_byte: |
528 | if (!bd->writeCount--) break; | 540 | if (--bd->writeCount < 0) |
541 | break; /* input block is fully consumed, need next one */ | ||
542 | |||
529 | /* Follow sequence vector to undo Burrows-Wheeler transform */ | 543 | /* Follow sequence vector to undo Burrows-Wheeler transform */ |
530 | previous = current; | 544 | previous = current; |
531 | pos = dbuf[pos]; | 545 | pos = dbuf[pos]; |
532 | current = pos & 0xff; | 546 | current = (uint8_t)pos; |
533 | pos >>= 8; | 547 | pos >>= 8; |
534 | 548 | ||
535 | /* After 3 consecutive copies of the same byte, the 4th | 549 | /* After 3 consecutive copies of the same byte, the 4th |
@@ -539,7 +553,7 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
539 | if (current != previous) | 553 | if (current != previous) |
540 | bd->writeRunCountdown = 4; | 554 | bd->writeRunCountdown = 4; |
541 | } else { | 555 | } else { |
542 | 556 | /* Unlikely branch */ | |
543 | /* We have a repeated run, this byte indicates the count */ | 557 | /* We have a repeated run, this byte indicates the count */ |
544 | bd->writeCopies = current; | 558 | bd->writeCopies = current; |
545 | current = previous; | 559 | current = previous; |
@@ -551,9 +565,9 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
551 | /* Subtract the 1 copy we'd output anyway to get extras */ | 565 | /* Subtract the 1 copy we'd output anyway to get extras */ |
552 | --bd->writeCopies; | 566 | --bd->writeCopies; |
553 | } | 567 | } |
554 | } | 568 | } /* for(;;) */ |
555 | 569 | ||
556 | /* Decompression of this block completed successfully */ | 570 | /* Decompression of this input block completed successfully */ |
557 | bd->writeCRC = ~bd->writeCRC; | 571 | bd->writeCRC = ~bd->writeCRC; |
558 | bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; | 572 | bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; |
559 | 573 | ||
@@ -565,16 +579,25 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
565 | } | 579 | } |
566 | 580 | ||
567 | /* Refill the intermediate buffer by Huffman-decoding next block of input */ | 581 | /* Refill the intermediate buffer by Huffman-decoding next block of input */ |
568 | /* (previous is just a convenient unused temp variable here) */ | 582 | { |
569 | previous = get_next_block(bd); | 583 | int r = get_next_block(bd); |
570 | if (previous) { | 584 | if (r) { |
571 | bd->writeCount = previous; | 585 | bd->writeCount = r; |
572 | return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; | 586 | return (r != RETVAL_LAST_BLOCK) ? r : gotcount; |
587 | } | ||
573 | } | 588 | } |
589 | |||
574 | bd->writeCRC = ~0; | 590 | bd->writeCRC = ~0; |
575 | pos = bd->writePos; | 591 | pos = bd->writePos; |
576 | current = bd->writeCurrent; | 592 | current = bd->writeCurrent; |
577 | goto decode_next_byte; | 593 | goto decode_next_byte; |
594 | |||
595 | outbuf_full: | ||
596 | /* Output buffer is full, snapshot state and return */ | ||
597 | bd->writePos = pos; | ||
598 | bd->writeCurrent = current; | ||
599 | bd->writeCopies++; | ||
600 | return gotcount; | ||
578 | } | 601 | } |
579 | 602 | ||
580 | /* Allocate the structure, read file header. If in_fd==-1, inbuf must contain | 603 | /* Allocate the structure, read file header. If in_fd==-1, inbuf must contain |
@@ -607,7 +630,7 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, | |||
607 | /* in this case, bd->inbuf is read-only */ | 630 | /* in this case, bd->inbuf is read-only */ |
608 | bd->inbuf = (void*)inbuf; /* cast away const-ness */ | 631 | bd->inbuf = (void*)inbuf; /* cast away const-ness */ |
609 | } else { | 632 | } else { |
610 | bd->inbuf = (unsigned char *)(bd + 1); | 633 | bd->inbuf = (uint8_t*)(bd + 1); |
611 | memcpy(bd->inbuf, inbuf, len); | 634 | memcpy(bd->inbuf, inbuf, len); |
612 | } | 635 | } |
613 | bd->inbufCount = len; | 636 | bd->inbufCount = len; |
@@ -634,7 +657,7 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, | |||
634 | bd->dbufSize = 100000 * (i - h0); | 657 | bd->dbufSize = 100000 * (i - h0); |
635 | 658 | ||
636 | /* Cannot use xmalloc - may leak bd in NOFORK case! */ | 659 | /* Cannot use xmalloc - may leak bd in NOFORK case! */ |
637 | bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(int)); | 660 | bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(bd->dbuf[0])); |
638 | if (!bd->dbuf) { | 661 | if (!bd->dbuf) { |
639 | free(bd); | 662 | free(bd); |
640 | xfunc_die(); | 663 | xfunc_die(); |