diff options
author | Denys Vlasenko <dvlasenk@redhat.com> | 2010-10-29 18:16:29 +0200 |
---|---|---|
committer | Denys Vlasenko <dvlasenk@redhat.com> | 2010-10-29 18:16:29 +0200 |
commit | bf3bec51fced9dbc800954885191e5671cb485ef (patch) | |
tree | 7285df6a716e9972ec208230fc5cfa6a711ca16c | |
parent | f29a1c56568e2cb572ea8bc47b29f70947abca7a (diff) | |
download | busybox-w32-bf3bec51fced9dbc800954885191e5671cb485ef.tar.gz busybox-w32-bf3bec51fced9dbc800954885191e5671cb485ef.tar.bz2 busybox-w32-bf3bec51fced9dbc800954885191e5671cb485ef.zip |
decompress_bunzip2: keep bd->writeCRC in CPU reg in the hot loop
-5 bytes on 64-bit, +7 bytes on 32-bit.
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
-rw-r--r-- | archival/libunarchive/decompress_bunzip2.c | 49 |
1 files changed, 28 insertions, 21 deletions
diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c index 3a5d23345..8d7746a79 100644 --- a/archival/libunarchive/decompress_bunzip2.c +++ b/archival/libunarchive/decompress_bunzip2.c | |||
@@ -492,15 +492,20 @@ static int get_next_block(bunzip_data *bd) | |||
492 | int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | 492 | int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) |
493 | { | 493 | { |
494 | const uint32_t *dbuf; | 494 | const uint32_t *dbuf; |
495 | int pos, current, previous, gotcount; | 495 | int pos, current, previous, out_count; |
496 | uint32_t CRC; | ||
496 | 497 | ||
497 | /* If last read was short due to end of file, return last block now */ | 498 | /* If we already have error/end indicator, return it */ |
498 | if (bd->writeCount < 0) return bd->writeCount; | 499 | if (bd->writeCount < 0) |
500 | return bd->writeCount; | ||
499 | 501 | ||
500 | gotcount = 0; | 502 | out_count = 0; |
501 | dbuf = bd->dbuf; | 503 | dbuf = bd->dbuf; |
504 | |||
505 | /* Register-cached state (hopefully): */ | ||
502 | pos = bd->writePos; | 506 | pos = bd->writePos; |
503 | current = bd->writeCurrent; | 507 | current = bd->writeCurrent; |
508 | CRC = bd->writeCRC; /* small loss on x86-32 (not enough regs), win on x86-64 */ | ||
504 | 509 | ||
505 | /* We will always have pending decoded data to write into the output | 510 | /* We will always have pending decoded data to write into the output |
506 | buffer unless this is the very first call (in which case we haven't | 511 | buffer unless this is the very first call (in which case we haven't |
@@ -514,8 +519,8 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
514 | /* Loop outputting bytes */ | 519 | /* Loop outputting bytes */ |
515 | for (;;) { | 520 | for (;;) { |
516 | 521 | ||
517 | /* If the output buffer is full, snapshot state and return */ | 522 | /* If the output buffer is full, save cached state and return */ |
518 | if (gotcount >= len) { | 523 | if (out_count >= len) { |
519 | /* Unlikely branch. | 524 | /* Unlikely branch. |
520 | * Use of "goto" instead of keeping code here | 525 | * Use of "goto" instead of keeping code here |
521 | * helps compiler to realize this. */ | 526 | * helps compiler to realize this. */ |
@@ -523,17 +528,16 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
523 | } | 528 | } |
524 | 529 | ||
525 | /* Write next byte into output buffer, updating CRC */ | 530 | /* Write next byte into output buffer, updating CRC */ |
526 | outbuf[gotcount++] = current; | 531 | outbuf[out_count++] = current; |
527 | bd->writeCRC = (bd->writeCRC << 8) | 532 | CRC = (CRC << 8) ^ bd->crc32Table[(CRC >> 24) ^ current]; |
528 | ^ bd->crc32Table[(bd->writeCRC >> 24) ^ current]; | ||
529 | 533 | ||
530 | /* Loop now if we're outputting multiple copies of this byte */ | 534 | /* Loop now if we're outputting multiple copies of this byte */ |
531 | if (bd->writeCopies) { | 535 | if (bd->writeCopies) { |
532 | /* Unlikely branch */ | 536 | /* Unlikely branch */ |
533 | /*--bd->writeCopies;*/ | 537 | /*--bd->writeCopies;*/ |
534 | /*continue;*/ | 538 | /*continue;*/ |
535 | /* Same, but (ab)using other existing --writeCopies operation. | 539 | /* Same, but (ab)using other existing --writeCopies operation |
536 | * Luckily, this also compiles into just one branch insn: */ | 540 | * (and this if() compiles into just test+branch pair): */ |
537 | goto dec_writeCopies; | 541 | goto dec_writeCopies; |
538 | } | 542 | } |
539 | decode_next_byte: | 543 | decode_next_byte: |
@@ -549,7 +553,7 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
549 | /* After 3 consecutive copies of the same byte, the 4th | 553 | /* After 3 consecutive copies of the same byte, the 4th |
550 | * is a repeat count. We count down from 4 instead | 554 | * is a repeat count. We count down from 4 instead |
551 | * of counting up because testing for non-zero is faster */ | 555 | * of counting up because testing for non-zero is faster */ |
552 | if (--bd->writeRunCountdown) { | 556 | if (--bd->writeRunCountdown != 0) { |
553 | if (current != previous) | 557 | if (current != previous) |
554 | bd->writeRunCountdown = 4; | 558 | bd->writeRunCountdown = 4; |
555 | } else { | 559 | } else { |
@@ -568,11 +572,11 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
568 | } /* for(;;) */ | 572 | } /* for(;;) */ |
569 | 573 | ||
570 | /* Decompression of this input block completed successfully */ | 574 | /* Decompression of this input block completed successfully */ |
571 | bd->writeCRC = ~bd->writeCRC; | 575 | bd->writeCRC = CRC = ~CRC; |
572 | bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; | 576 | bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ CRC; |
573 | 577 | ||
574 | /* If this block had a CRC error, force file level CRC error. */ | 578 | /* If this block had a CRC error, force file level CRC error */ |
575 | if (bd->writeCRC != bd->headerCRC) { | 579 | if (CRC != bd->headerCRC) { |
576 | bd->totalCRC = bd->headerCRC + 1; | 580 | bd->totalCRC = bd->headerCRC + 1; |
577 | return RETVAL_LAST_BLOCK; | 581 | return RETVAL_LAST_BLOCK; |
578 | } | 582 | } |
@@ -581,23 +585,26 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) | |||
581 | /* Refill the intermediate buffer by Huffman-decoding next block of input */ | 585 | /* Refill the intermediate buffer by Huffman-decoding next block of input */ |
582 | { | 586 | { |
583 | int r = get_next_block(bd); | 587 | int r = get_next_block(bd); |
584 | if (r) { | 588 | if (r) { /* error/end */ |
585 | bd->writeCount = r; | 589 | bd->writeCount = r; |
586 | return (r != RETVAL_LAST_BLOCK) ? r : gotcount; | 590 | return (r != RETVAL_LAST_BLOCK) ? r : out_count; |
587 | } | 591 | } |
588 | } | 592 | } |
589 | 593 | ||
590 | bd->writeCRC = ~0; | 594 | CRC = ~0; |
591 | pos = bd->writePos; | 595 | pos = bd->writePos; |
592 | current = bd->writeCurrent; | 596 | current = bd->writeCurrent; |
593 | goto decode_next_byte; | 597 | goto decode_next_byte; |
594 | 598 | ||
595 | outbuf_full: | 599 | outbuf_full: |
596 | /* Output buffer is full, snapshot state and return */ | 600 | /* Output buffer is full, save cached state and return */ |
597 | bd->writePos = pos; | 601 | bd->writePos = pos; |
598 | bd->writeCurrent = current; | 602 | bd->writeCurrent = current; |
603 | bd->writeCRC = CRC; | ||
604 | |||
599 | bd->writeCopies++; | 605 | bd->writeCopies++; |
600 | return gotcount; | 606 | |
607 | return out_count; | ||
601 | } | 608 | } |
602 | 609 | ||
603 | /* Allocate the structure, read file header. If in_fd==-1, inbuf must contain | 610 | /* Allocate the structure, read file header. If in_fd==-1, inbuf must contain |