diff options
Diffstat (limited to 'crc32.c')
-rw-r--r-- | crc32.c | 114 |
1 files changed, 114 insertions, 0 deletions
@@ -617,6 +617,118 @@ const z_crc_t FAR * ZEXPORT get_crc_table() | |||
617 | return (const z_crc_t FAR *)crc_table; | 617 | return (const z_crc_t FAR *)crc_table; |
618 | } | 618 | } |
619 | 619 | ||
620 | /* ========================================================================= | ||
621 | * Use ARM machine instructions if requested. This will compute the CRC about | ||
622 | * ten times faster than the braided calculation. This code does not check for | ||
623 | * the presence of the CRC instruction. Compile with care. | ||
624 | */ | ||
625 | #if defined(Z_ARM_CRC32) && defined(__aarch64__) && W == 8 | ||
626 | |||
627 | /* | ||
628 | Constants empirically determined to maximize speed. These values are from | ||
629 | measurements on a Cortex-A57. Your mileage may vary. | ||
630 | */ | ||
631 | #define Z_BATCH 3990 /* number of words in a batch */ | ||
632 | #define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ | ||
633 | #define Z_BATCH_MIN 800 /* fewest words in a final batch */ | ||
634 | |||
635 | unsigned long ZEXPORT crc32_z(crc, buf, len) | ||
636 | unsigned long crc; | ||
637 | const unsigned char FAR *buf; | ||
638 | z_size_t len; | ||
639 | { | ||
640 | z_crc_t val; | ||
641 | z_word_t crc1, crc2; | ||
642 | const z_word_t *word; | ||
643 | z_word_t val0, val1, val2; | ||
644 | z_size_t last, last2, i; | ||
645 | z_size_t num; | ||
646 | |||
647 | /* Return initial CRC, if requested. */ | ||
648 | if (buf == Z_NULL) return 0; | ||
649 | |||
650 | #ifdef DYNAMIC_CRC_TABLE | ||
651 | once(&made, make_crc_table); | ||
652 | #endif /* DYNAMIC_CRC_TABLE */ | ||
653 | |||
654 | /* Pre-condition the CRC */ | ||
655 | crc ^= 0xffffffff; | ||
656 | |||
657 | /* Compute the CRC up to a word boundary. */ | ||
658 | while (len && ((z_size_t)buf & 7) != 0) { | ||
659 | len--; | ||
660 | val = *buf++; | ||
661 | __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); | ||
662 | } | ||
663 | |||
664 | /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ | ||
665 | word = (z_word_t const *)buf; | ||
666 | num = len >> 3; | ||
667 | len &= 7; | ||
668 | |||
669 | /* Do three interleaved CRCs to realize the throughput of one crc32x | ||
670 | instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three | ||
671 | CRCs are combined into a single CRC after each set of batches. */ | ||
672 | while (num >= 3 * Z_BATCH) { | ||
673 | crc1 = 0; | ||
674 | crc2 = 0; | ||
675 | for (i = 0; i < Z_BATCH; i++) { | ||
676 | val0 = word[i]; | ||
677 | val1 = word[i + Z_BATCH]; | ||
678 | val2 = word[i + 2 * Z_BATCH]; | ||
679 | __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); | ||
680 | __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); | ||
681 | __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); | ||
682 | } | ||
683 | word += 3 * Z_BATCH; | ||
684 | num -= 3 * Z_BATCH; | ||
685 | crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; | ||
686 | crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; | ||
687 | } | ||
688 | |||
689 | /* Do one last smaller batch with the remaining words, if there are enough | ||
690 | to pay for the combination of CRCs. */ | ||
691 | last = num / 3; | ||
692 | if (last >= Z_BATCH_MIN) { | ||
693 | last2 = last << 1; | ||
694 | crc1 = 0; | ||
695 | crc2 = 0; | ||
696 | for (i = 0; i < last; i++) { | ||
697 | val0 = word[i]; | ||
698 | val1 = word[i + last]; | ||
699 | val2 = word[i + last2]; | ||
700 | __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); | ||
701 | __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); | ||
702 | __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); | ||
703 | } | ||
704 | word += 3 * last; | ||
705 | num -= 3 * last; | ||
706 | val = x2nmodp(last, 6); | ||
707 | crc = multmodp(val, crc) ^ crc1; | ||
708 | crc = multmodp(val, crc) ^ crc2; | ||
709 | } | ||
710 | |||
711 | /* Compute the CRC on any remaining words. */ | ||
712 | for (i = 0; i < num; i++) { | ||
713 | val0 = word[i]; | ||
714 | __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); | ||
715 | } | ||
716 | word += num; | ||
717 | |||
718 | /* Complete the CRC on any remaining bytes. */ | ||
719 | buf = (const unsigned char FAR *)word; | ||
720 | while (len) { | ||
721 | len--; | ||
722 | val = *buf++; | ||
723 | __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); | ||
724 | } | ||
725 | |||
726 | /* Return the CRC, post-conditioned. */ | ||
727 | return crc ^ 0xffffffff; | ||
728 | } | ||
729 | |||
730 | #else | ||
731 | |||
620 | /* ========================================================================= */ | 732 | /* ========================================================================= */ |
621 | unsigned long ZEXPORT crc32_z(crc, buf, len) | 733 | unsigned long ZEXPORT crc32_z(crc, buf, len) |
622 | unsigned long crc; | 734 | unsigned long crc; |
@@ -939,6 +1051,8 @@ unsigned long ZEXPORT crc32_z(crc, buf, len) | |||
939 | return crc ^ 0xffffffff; | 1051 | return crc ^ 0xffffffff; |
940 | } | 1052 | } |
941 | 1053 | ||
1054 | #endif | ||
1055 | |||
942 | /* ========================================================================= */ | 1056 | /* ========================================================================= */ |
943 | unsigned long ZEXPORT crc32(crc, buf, len) | 1057 | unsigned long ZEXPORT crc32(crc, buf, len) |
944 | unsigned long crc; | 1058 | unsigned long crc; |