diff options
| author | Denys Vlasenko <dvlasenk@redhat.com> | 2010-10-29 18:16:29 +0200 |
|---|---|---|
| committer | Denys Vlasenko <dvlasenk@redhat.com> | 2010-10-29 18:16:29 +0200 |
| commit | bf3bec51fced9dbc800954885191e5671cb485ef (patch) | |
| tree | 7285df6a716e9972ec208230fc5cfa6a711ca16c | |
| parent | f29a1c56568e2cb572ea8bc47b29f70947abca7a (diff) | |
| download | busybox-w32-bf3bec51fced9dbc800954885191e5671cb485ef.tar.gz busybox-w32-bf3bec51fced9dbc800954885191e5671cb485ef.tar.bz2 busybox-w32-bf3bec51fced9dbc800954885191e5671cb485ef.zip | |
decompress_bunzip2: keep bd->writeCRC in CPU reg in the hot loop
-5 bytes on 64-bit, +7 bytes on 32-bit.
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
| -rw-r--r-- | archival/libunarchive/decompress_bunzip2.c | 49 |
1 files changed, 28 insertions, 21 deletions
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c index 3a5d23345..8d7746a79 100644 --- a/archival/libunarchive/decompress_bunzip2.c +++ b/archival/libunarchive/decompress_bunzip2.c | |||
| @@ -492,15 +492,20 @@ static int get_next_block(bunzip_data *bd) | |||
| 492 | int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | 492 | int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) |
| 493 | { | 493 | { |
| 494 | const uint32_t *dbuf; | 494 | const uint32_t *dbuf; |
| 495 | int pos, current, previous, gotcount; | 495 | int pos, current, previous, out_count; |
| 496 | uint32_t CRC; | ||
| 496 | 497 | ||
| 497 | /* If last read was short due to end of file, return last block now */ | 498 | /* If we already have error/end indicator, return it */ |
| 498 | if (bd->writeCount < 0) return bd->writeCount; | 499 | if (bd->writeCount < 0) |
| 500 | return bd->writeCount; | ||
| 499 | 501 | ||
| 500 | gotcount = 0; | 502 | out_count = 0; |
| 501 | dbuf = bd->dbuf; | 503 | dbuf = bd->dbuf; |
| 504 | |||
| 505 | /* Register-cached state (hopefully): */ | ||
| 502 | pos = bd->writePos; | 506 | pos = bd->writePos; |
| 503 | current = bd->writeCurrent; | 507 | current = bd->writeCurrent; |
| 508 | CRC = bd->writeCRC; /* small loss on x86-32 (not enough regs), win on x86-64 */ | ||
| 504 | 509 | ||
| 505 | /* We will always have pending decoded data to write into the output | 510 | /* We will always have pending decoded data to write into the output |
| 506 | buffer unless this is the very first call (in which case we haven't | 511 | buffer unless this is the very first call (in which case we haven't |
| @@ -514,8 +519,8 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
| 514 | /* Loop outputting bytes */ | 519 | /* Loop outputting bytes */ |
| 515 | for (;;) { | 520 | for (;;) { |
| 516 | 521 | ||
| 517 | /* If the output buffer is full, snapshot state and return */ | 522 | /* If the output buffer is full, save cached state and return */ |
| 518 | if (gotcount >= len) { | 523 | if (out_count >= len) { |
| 519 | /* Unlikely branch. | 524 | /* Unlikely branch. |
| 520 | * Use of "goto" instead of keeping code here | 525 | * Use of "goto" instead of keeping code here |
| 521 | * helps compiler to realize this. */ | 526 | * helps compiler to realize this. */ |
| @@ -523,17 +528,16 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
| 523 | } | 528 | } |
| 524 | 529 | ||
| 525 | /* Write next byte into output buffer, updating CRC */ | 530 | /* Write next byte into output buffer, updating CRC */ |
| 526 | outbuf[gotcount++] = current; | 531 | outbuf[out_count++] = current; |
| 527 | bd->writeCRC = (bd->writeCRC << 8) | 532 | CRC = (CRC << 8) ^ bd->crc32Table[(CRC >> 24) ^ current]; |
| 528 | ^ bd->crc32Table[(bd->writeCRC >> 24) ^ current]; | ||
| 529 | 533 | ||
| 530 | /* Loop now if we're outputting multiple copies of this byte */ | 534 | /* Loop now if we're outputting multiple copies of this byte */ |
| 531 | if (bd->writeCopies) { | 535 | if (bd->writeCopies) { |
| 532 | /* Unlikely branch */ | 536 | /* Unlikely branch */ |
| 533 | /*--bd->writeCopies;*/ | 537 | /*--bd->writeCopies;*/ |
| 534 | /*continue;*/ | 538 | /*continue;*/ |
| 535 | /* Same, but (ab)using other existing --writeCopies operation. | 539 | /* Same, but (ab)using other existing --writeCopies operation |
| 536 | * Luckily, this also compiles into just one branch insn: */ | 540 | * (and this if() compiles into just test+branch pair): */ |
| 537 | goto dec_writeCopies; | 541 | goto dec_writeCopies; |
| 538 | } | 542 | } |
| 539 | decode_next_byte: | 543 | decode_next_byte: |
| @@ -549,7 +553,7 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
| 549 | /* After 3 consecutive copies of the same byte, the 4th | 553 | /* After 3 consecutive copies of the same byte, the 4th |
| 550 | * is a repeat count. We count down from 4 instead | 554 | * is a repeat count. We count down from 4 instead |
| 551 | * of counting up because testing for non-zero is faster */ | 555 | * of counting up because testing for non-zero is faster */ |
| 552 | if (--bd->writeRunCountdown) { | 556 | if (--bd->writeRunCountdown != 0) { |
| 553 | if (current != previous) | 557 | if (current != previous) |
| 554 | bd->writeRunCountdown = 4; | 558 | bd->writeRunCountdown = 4; |
| 555 | } else { | 559 | } else { |
| @@ -568,11 +572,11 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
| 568 | } /* for(;;) */ | 572 | } /* for(;;) */ |
| 569 | 573 | ||
| 570 | /* Decompression of this input block completed successfully */ | 574 | /* Decompression of this input block completed successfully */ |
| 571 | bd->writeCRC = ~bd->writeCRC; | 575 | bd->writeCRC = CRC = ~CRC; |
| 572 | bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; | 576 | bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ CRC; |
| 573 | 577 | ||
| 574 | /* If this block had a CRC error, force file level CRC error. */ | 578 | /* If this block had a CRC error, force file level CRC error */ |
| 575 | if (bd->writeCRC != bd->headerCRC) { | 579 | if (CRC != bd->headerCRC) { |
| 576 | bd->totalCRC = bd->headerCRC + 1; | 580 | bd->totalCRC = bd->headerCRC + 1; |
| 577 | return RETVAL_LAST_BLOCK; | 581 | return RETVAL_LAST_BLOCK; |
| 578 | } | 582 | } |
| @@ -581,23 +585,26 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
| 581 | /* Refill the intermediate buffer by Huffman-decoding next block of input */ | 585 | /* Refill the intermediate buffer by Huffman-decoding next block of input */ |
| 582 | { | 586 | { |
| 583 | int r = get_next_block(bd); | 587 | int r = get_next_block(bd); |
| 584 | if (r) { | 588 | if (r) { /* error/end */ |
| 585 | bd->writeCount = r; | 589 | bd->writeCount = r; |
| 586 | return (r != RETVAL_LAST_BLOCK) ? r : gotcount; | 590 | return (r != RETVAL_LAST_BLOCK) ? r : out_count; |
| 587 | } | 591 | } |
| 588 | } | 592 | } |
| 589 | 593 | ||
| 590 | bd->writeCRC = ~0; | 594 | CRC = ~0; |
| 591 | pos = bd->writePos; | 595 | pos = bd->writePos; |
| 592 | current = bd->writeCurrent; | 596 | current = bd->writeCurrent; |
| 593 | goto decode_next_byte; | 597 | goto decode_next_byte; |
| 594 | 598 | ||
| 595 | outbuf_full: | 599 | outbuf_full: |
| 596 | /* Output buffer is full, snapshot state and return */ | 600 | /* Output buffer is full, save cached state and return */ |
| 597 | bd->writePos = pos; | 601 | bd->writePos = pos; |
| 598 | bd->writeCurrent = current; | 602 | bd->writeCurrent = current; |
| 603 | bd->writeCRC = CRC; | ||
| 604 | |||
| 599 | bd->writeCopies++; | 605 | bd->writeCopies++; |
| 600 | return gotcount; | 606 | |
| 607 | return out_count; | ||
| 601 | } | 608 | } |
| 602 | 609 | ||
| 603 | /* Allocate the structure, read file header. If in_fd==-1, inbuf must contain | 610 | /* Allocate the structure, read file header. If in_fd==-1, inbuf must contain |
