aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <dvlasenk@redhat.com>2010-10-29 16:05:05 +0200
committerDenys Vlasenko <dvlasenk@redhat.com>2010-10-29 16:05:05 +0200
commit36ef0a677ee26ea007e0620227d361e14d3940ef (patch)
tree812705668445be27ed47a300e170f87344968fa9
parentfb132e47370378474c68ad22c1c0cb2ccee178de (diff)
downloadbusybox-w32-36ef0a677ee26ea007e0620227d361e14d3940ef.tar.gz
busybox-w32-36ef0a677ee26ea007e0620227d361e14d3940ef.tar.bz2
busybox-w32-36ef0a677ee26ea007e0620227d361e14d3940ef.zip
decompress_bunzip2: code shrink
function old new delta get_next_block 1828 1827 -1 get_bits 164 156 -8 read_bunzip 304 261 -43 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-52) Total: -52 bytes Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
-rw-r--r--archival/libunarchive/decompress_bunzip2.c111
1 files changed, 67 insertions, 44 deletions
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c
index cf6e1988f..3a5d23345 100644
--- a/archival/libunarchive/decompress_bunzip2.c
+++ b/archival/libunarchive/decompress_bunzip2.c
@@ -16,7 +16,7 @@
16 function, and various other tweaks. In (limited) tests, approximately 16 function, and various other tweaks. In (limited) tests, approximately
17 20% faster than bzcat on x86 and about 10% faster on arm. 17 20% faster than bzcat on x86 and about 10% faster on arm.
18 18
19 Note that about 2/3 of the time is spent in read_unzip() reversing 19 Note that about 2/3 of the time is spent in read_bunzip() reversing
20 the Burrows-Wheeler transformation. Much of that time is delay 20 the Burrows-Wheeler transformation. Much of that time is delay
21 resulting from cache misses. 21 resulting from cache misses.
22 22
@@ -70,23 +70,25 @@ struct bunzip_data {
70 /* I/O tracking data (file handles, buffers, positions, etc.) */ 70 /* I/O tracking data (file handles, buffers, positions, etc.) */
71 unsigned inbufBitCount, inbufBits; 71 unsigned inbufBitCount, inbufBits;
72 int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/; 72 int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/;
73 unsigned char *inbuf /*,*outbuf*/; 73 uint8_t *inbuf /*,*outbuf*/;
74 74
75 /* State for interrupting output loop */ 75 /* State for interrupting output loop */
76 int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; 76 int writeCopies, writePos, writeRunCountdown, writeCount;
77 int writeCurrent; /* actually a uint8_t */
77 78
78 /* The CRC values stored in the block header and calculated from the data */ 79 /* The CRC values stored in the block header and calculated from the data */
79 uint32_t headerCRC, totalCRC, writeCRC; 80 uint32_t headerCRC, totalCRC, writeCRC;
80 81
81 /* Intermediate buffer and its size (in bytes) */ 82 /* Intermediate buffer and its size (in bytes) */
82 unsigned *dbuf, dbufSize; 83 uint32_t *dbuf;
84 unsigned dbufSize;
83 85
84 /* For I/O error handling */ 86 /* For I/O error handling */
85 jmp_buf jmpbuf; 87 jmp_buf jmpbuf;
86 88
87 /* Big things go last (register-relative addressing can be larger for big offsets) */ 89 /* Big things go last (register-relative addressing can be larger for big offsets) */
88 uint32_t crc32Table[256]; 90 uint32_t crc32Table[256];
89 unsigned char selectors[32768]; /* nSelectors=15 bits */ 91 uint8_t selectors[32768]; /* nSelectors=15 bits */
90 struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ 92 struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
91}; 93};
92/* typedef struct bunzip_data bunzip_data; -- done in .h file */ 94/* typedef struct bunzip_data bunzip_data; -- done in .h file */
@@ -94,14 +96,15 @@ struct bunzip_data {
94 96
95/* Return the next nnn bits of input. All reads from the compressed input 97/* Return the next nnn bits of input. All reads from the compressed input
96 are done through this function. All reads are big endian */ 98 are done through this function. All reads are big endian */
97
98static unsigned get_bits(bunzip_data *bd, int bits_wanted) 99static unsigned get_bits(bunzip_data *bd, int bits_wanted)
99{ 100{
100 unsigned bits = 0; 101 unsigned bits = 0;
102 /* Cache bd->inbufBitCount in a CPU register (hopefully): */
103 int bit_count = bd->inbufBitCount;
101 104
102 /* If we need to get more data from the byte buffer, do so. (Loop getting 105 /* If we need to get more data from the byte buffer, do so. (Loop getting
103 one byte at a time to enforce endianness and avoid unaligned access.) */ 106 one byte at a time to enforce endianness and avoid unaligned access.) */
104 while ((int)(bd->inbufBitCount) < bits_wanted) { 107 while (bit_count < bits_wanted) {
105 108
106 /* If we need to read more data from file into byte buffer, do so */ 109 /* If we need to read more data from file into byte buffer, do so */
107 if (bd->inbufPos == bd->inbufCount) { 110 if (bd->inbufPos == bd->inbufCount) {
@@ -113,33 +116,35 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
113 } 116 }
114 117
115 /* Avoid 32-bit overflow (dump bit buffer to top of output) */ 118 /* Avoid 32-bit overflow (dump bit buffer to top of output) */
116 if (bd->inbufBitCount >= 24) { 119 if (bit_count >= 24) {
117 bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1); 120 bits = bd->inbufBits & ((1 << bit_count) - 1);
118 bits_wanted -= bd->inbufBitCount; 121 bits_wanted -= bit_count;
119 bits <<= bits_wanted; 122 bits <<= bits_wanted;
120 bd->inbufBitCount = 0; 123 bit_count = 0;
121 } 124 }
122 125
123 /* Grab next 8 bits of input from buffer. */ 126 /* Grab next 8 bits of input from buffer. */
124 bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; 127 bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++];
125 bd->inbufBitCount += 8; 128 bit_count += 8;
126 } 129 }
127 130
128 /* Calculate result */ 131 /* Calculate result */
129 bd->inbufBitCount -= bits_wanted; 132 bit_count -= bits_wanted;
130 bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1); 133 bd->inbufBitCount = bit_count;
134 bits |= (bd->inbufBits >> bit_count) & ((1 << bits_wanted) - 1);
131 135
132 return bits; 136 return bits;
133} 137}
134 138
135/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ 139/* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */
136static int get_next_block(bunzip_data *bd) 140static int get_next_block(bunzip_data *bd)
137{ 141{
138 struct group_data *hufGroup; 142 struct group_data *hufGroup;
139 int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector, 143 int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector,
140 i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; 144 i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256];
141 unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; 145 uint8_t uc, symToByte[256], mtfSymbol[256], *selectors;
142 unsigned *dbuf, origPtr; 146 uint32_t *dbuf;
147 unsigned origPtr;
143 148
144 dbuf = bd->dbuf; 149 dbuf = bd->dbuf;
145 dbufSize = bd->dbufSize; 150 dbufSize = bd->dbufSize;
@@ -200,7 +205,8 @@ static int get_next_block(bunzip_data *bd)
200 205
201 /* Decode MTF to get the next selector */ 206 /* Decode MTF to get the next selector */
202 uc = mtfSymbol[j]; 207 uc = mtfSymbol[j];
203 for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1]; 208 for (; j; j--)
209 mtfSymbol[j] = mtfSymbol[j-1];
204 mtfSymbol[0] = selectors[i] = uc; 210 mtfSymbol[0] = selectors[i] = uc;
205 } 211 }
206 212
@@ -208,7 +214,7 @@ static int get_next_block(bunzip_data *bd)
208 literal symbols, plus two run symbols (RUNA, RUNB) */ 214 literal symbols, plus two run symbols (RUNA, RUNB) */
209 symCount = symTotal + 2; 215 symCount = symTotal + 2;
210 for (j = 0; j < groupCount; j++) { 216 for (j = 0; j < groupCount; j++) {
211 unsigned char length[MAX_SYMBOLS]; 217 uint8_t length[MAX_SYMBOLS];
212 /* 8 bits is ALMOST enough for temp[], see below */ 218 /* 8 bits is ALMOST enough for temp[], see below */
213 unsigned temp[MAX_HUFCODE_BITS+1]; 219 unsigned temp[MAX_HUFCODE_BITS+1];
214 int minLen, maxLen, pp; 220 int minLen, maxLen, pp;
@@ -315,7 +321,7 @@ static int get_next_block(bunzip_data *bd)
315 memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */ 321 memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */
316 for (i = 0; i < 256; i++) { 322 for (i = 0; i < 256; i++) {
317 //byteCount[i] = 0; 323 //byteCount[i] = 0;
318 mtfSymbol[i] = (unsigned char)i; 324 mtfSymbol[i] = (uint8_t)i;
319 } 325 }
320 326
321 /* Loop through compressed symbols. */ 327 /* Loop through compressed symbols. */
@@ -456,7 +462,7 @@ static int get_next_block(bunzip_data *bd)
456 462
457 /* Figure out what order dbuf would be in if we sorted it. */ 463 /* Figure out what order dbuf would be in if we sorted it. */
458 for (i = 0; i < dbufCount; i++) { 464 for (i = 0; i < dbufCount; i++) {
459 uc = (unsigned char)(dbuf[i] & 0xff); 465 uc = (uint8_t)dbuf[i];
460 dbuf[byteCount[uc]] |= (i << 8); 466 dbuf[byteCount[uc]] |= (i << 8);
461 byteCount[uc]++; 467 byteCount[uc]++;
462 } 468 }
@@ -465,10 +471,11 @@ static int get_next_block(bunzip_data *bd)
465 doesn't get output, and if the first three characters are identical 471 doesn't get output, and if the first three characters are identical
466 it doesn't qualify as a run (hence writeRunCountdown=5). */ 472 it doesn't qualify as a run (hence writeRunCountdown=5). */
467 if (dbufCount) { 473 if (dbufCount) {
474 uint32_t tmp;
468 if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; 475 if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR;
469 bd->writePos = dbuf[origPtr]; 476 tmp = dbuf[origPtr];
470 bd->writeCurrent = (unsigned char)(bd->writePos & 0xff); 477 bd->writeCurrent = (uint8_t)tmp;
471 bd->writePos >>= 8; 478 bd->writePos = (tmp >> 8);
472 bd->writeRunCountdown = 5; 479 bd->writeRunCountdown = 5;
473 } 480 }
474 bd->writeCount = dbufCount; 481 bd->writeCount = dbufCount;
@@ -476,7 +483,7 @@ static int get_next_block(bunzip_data *bd)
476 return RETVAL_OK; 483 return RETVAL_OK;
477} 484}
478 485
479/* Undo burrows-wheeler transform on intermediate buffer to produce output. 486/* Undo Burrows-Wheeler transform on intermediate buffer to produce output.
480 If start_bunzip was initialized with out_fd=-1, then up to len bytes of 487 If start_bunzip was initialized with out_fd=-1, then up to len bytes of
481 data are written to outbuf. Return value is number of bytes written or 488 data are written to outbuf. Return value is number of bytes written or
482 error (all errors are negative numbers). If out_fd!=-1, outbuf and len 489 error (all errors are negative numbers). If out_fd!=-1, outbuf and len
@@ -484,7 +491,7 @@ static int get_next_block(bunzip_data *bd)
484*/ 491*/
485int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) 492int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
486{ 493{
487 const unsigned *dbuf; 494 const uint32_t *dbuf;
488 int pos, current, previous, gotcount; 495 int pos, current, previous, gotcount;
489 496
490 /* If last read was short due to end of file, return last block now */ 497 /* If last read was short due to end of file, return last block now */
@@ -500,6 +507,7 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
500 Huffman-decoded a block into the intermediate buffer yet). */ 507 Huffman-decoded a block into the intermediate buffer yet). */
501 if (bd->writeCopies) { 508 if (bd->writeCopies) {
502 509
510 dec_writeCopies:
503 /* Inside the loop, writeCopies means extra copies (beyond 1) */ 511 /* Inside the loop, writeCopies means extra copies (beyond 1) */
504 --bd->writeCopies; 512 --bd->writeCopies;
505 513
@@ -508,10 +516,10 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
508 516
509 /* If the output buffer is full, snapshot state and return */ 517 /* If the output buffer is full, snapshot state and return */
510 if (gotcount >= len) { 518 if (gotcount >= len) {
511 bd->writePos = pos; 519 /* Unlikely branch.
512 bd->writeCurrent = current; 520 * Use of "goto" instead of keeping code here
513 bd->writeCopies++; 521 * helps compiler to realize this. */
514 return len; 522 goto outbuf_full;
515 } 523 }
516 524
517 /* Write next byte into output buffer, updating CRC */ 525 /* Write next byte into output buffer, updating CRC */
@@ -521,15 +529,21 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
521 529
522 /* Loop now if we're outputting multiple copies of this byte */ 530 /* Loop now if we're outputting multiple copies of this byte */
523 if (bd->writeCopies) { 531 if (bd->writeCopies) {
524 --bd->writeCopies; 532 /* Unlikely branch */
525 continue; 533 /*--bd->writeCopies;*/
534 /*continue;*/
535 /* Same, but (ab)using other existing --writeCopies operation.
536 * Luckily, this also compiles into just one branch insn: */
537 goto dec_writeCopies;
526 } 538 }
527 decode_next_byte: 539 decode_next_byte:
528 if (!bd->writeCount--) break; 540 if (--bd->writeCount < 0)
541 break; /* input block is fully consumed, need next one */
542
529 /* Follow sequence vector to undo Burrows-Wheeler transform */ 543 /* Follow sequence vector to undo Burrows-Wheeler transform */
530 previous = current; 544 previous = current;
531 pos = dbuf[pos]; 545 pos = dbuf[pos];
532 current = pos & 0xff; 546 current = (uint8_t)pos;
533 pos >>= 8; 547 pos >>= 8;
534 548
535 /* After 3 consecutive copies of the same byte, the 4th 549 /* After 3 consecutive copies of the same byte, the 4th
@@ -539,7 +553,7 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
539 if (current != previous) 553 if (current != previous)
540 bd->writeRunCountdown = 4; 554 bd->writeRunCountdown = 4;
541 } else { 555 } else {
542 556 /* Unlikely branch */
543 /* We have a repeated run, this byte indicates the count */ 557 /* We have a repeated run, this byte indicates the count */
544 bd->writeCopies = current; 558 bd->writeCopies = current;
545 current = previous; 559 current = previous;
@@ -551,9 +565,9 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
551 /* Subtract the 1 copy we'd output anyway to get extras */ 565 /* Subtract the 1 copy we'd output anyway to get extras */
552 --bd->writeCopies; 566 --bd->writeCopies;
553 } 567 }
554 } 568 } /* for(;;) */
555 569
556 /* Decompression of this block completed successfully */ 570 /* Decompression of this input block completed successfully */
557 bd->writeCRC = ~bd->writeCRC; 571 bd->writeCRC = ~bd->writeCRC;
558 bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; 572 bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC;
559 573
@@ -565,16 +579,25 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
565 } 579 }
566 580
567 /* Refill the intermediate buffer by Huffman-decoding next block of input */ 581 /* Refill the intermediate buffer by Huffman-decoding next block of input */
568 /* (previous is just a convenient unused temp variable here) */ 582 {
569 previous = get_next_block(bd); 583 int r = get_next_block(bd);
570 if (previous) { 584 if (r) {
571 bd->writeCount = previous; 585 bd->writeCount = r;
572 return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; 586 return (r != RETVAL_LAST_BLOCK) ? r : gotcount;
587 }
573 } 588 }
589
574 bd->writeCRC = ~0; 590 bd->writeCRC = ~0;
575 pos = bd->writePos; 591 pos = bd->writePos;
576 current = bd->writeCurrent; 592 current = bd->writeCurrent;
577 goto decode_next_byte; 593 goto decode_next_byte;
594
595 outbuf_full:
596 /* Output buffer is full, snapshot state and return */
597 bd->writePos = pos;
598 bd->writeCurrent = current;
599 bd->writeCopies++;
600 return gotcount;
578} 601}
579 602
580/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain 603/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain
@@ -607,7 +630,7 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
607 /* in this case, bd->inbuf is read-only */ 630 /* in this case, bd->inbuf is read-only */
608 bd->inbuf = (void*)inbuf; /* cast away const-ness */ 631 bd->inbuf = (void*)inbuf; /* cast away const-ness */
609 } else { 632 } else {
610 bd->inbuf = (unsigned char *)(bd + 1); 633 bd->inbuf = (uint8_t*)(bd + 1);
611 memcpy(bd->inbuf, inbuf, len); 634 memcpy(bd->inbuf, inbuf, len);
612 } 635 }
613 bd->inbufCount = len; 636 bd->inbufCount = len;
@@ -634,7 +657,7 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
634 bd->dbufSize = 100000 * (i - h0); 657 bd->dbufSize = 100000 * (i - h0);
635 658
636 /* Cannot use xmalloc - may leak bd in NOFORK case! */ 659 /* Cannot use xmalloc - may leak bd in NOFORK case! */
637 bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(int)); 660 bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(bd->dbuf[0]));
638 if (!bd->dbuf) { 661 if (!bd->dbuf) {
639 free(bd); 662 free(bd);
640 xfunc_die(); 663 xfunc_die();