aboutsummaryrefslogtreecommitdiff
path: root/crc32.c
diff options
context:
space:
mode:
Diffstat (limited to 'crc32.c')
-rw-r--r--crc32.c114
1 files changed, 114 insertions, 0 deletions
diff --git a/crc32.c b/crc32.c
index f6cd52f..2d20829 100644
--- a/crc32.c
+++ b/crc32.c
@@ -617,6 +617,118 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
617 return (const z_crc_t FAR *)crc_table; 617 return (const z_crc_t FAR *)crc_table;
618} 618}
619 619
620/* =========================================================================
621 * Use ARM machine instructions if requested. This will compute the CRC about
622 * ten times faster than the braided calculation. This code does not check for
623 * the presence of the CRC instruction. Compile with care.
624 */
625#if defined(Z_ARM_CRC32) && defined(__aarch64__) && W == 8
626
627/*
628 Constants empirically determined to maximize speed. These values are from
629 measurements on a Cortex-A57. Your mileage may vary.
630 */
631#define Z_BATCH 3990 /* number of words in a batch */
632#define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */
633#define Z_BATCH_MIN 800 /* fewest words in a final batch */
634
635unsigned long ZEXPORT crc32_z(crc, buf, len)
636 unsigned long crc;
637 const unsigned char FAR *buf;
638 z_size_t len;
639{
640 z_crc_t val;
641 z_word_t crc1, crc2;
642 const z_word_t *word;
643 z_word_t val0, val1, val2;
644 z_size_t last, last2, i;
645 z_size_t num;
646
647 /* Return initial CRC, if requested. */
648 if (buf == Z_NULL) return 0;
649
650#ifdef DYNAMIC_CRC_TABLE
651 once(&made, make_crc_table);
652#endif /* DYNAMIC_CRC_TABLE */
653
654 /* Pre-condition the CRC */
655 crc ^= 0xffffffff;
656
657 /* Compute the CRC up to a word boundary. */
658 while (len && ((z_size_t)buf & 7) != 0) {
659 len--;
660 val = *buf++;
661 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
662 }
663
664 /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */
665 word = (z_word_t const *)buf;
666 num = len >> 3;
667 len &= 7;
668
669 /* Do three interleaved CRCs to realize the throughput of one crc32x
670 instruction per cycle. Each CRC is calcuated on Z_BATCH words. The three
671 CRCs are combined into a single CRC after each set of batches. */
672 while (num >= 3 * Z_BATCH) {
673 crc1 = 0;
674 crc2 = 0;
675 for (i = 0; i < Z_BATCH; i++) {
676 val0 = word[i];
677 val1 = word[i + Z_BATCH];
678 val2 = word[i + 2 * Z_BATCH];
679 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
680 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
681 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
682 }
683 word += 3 * Z_BATCH;
684 num -= 3 * Z_BATCH;
685 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1;
686 crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2;
687 }
688
689 /* Do one last smaller batch with the remaining words, if there are enough
690 to pay for the combination of CRCs. */
691 last = num / 3;
692 if (last >= Z_BATCH_MIN) {
693 last2 = last << 1;
694 crc1 = 0;
695 crc2 = 0;
696 for (i = 0; i < last; i++) {
697 val0 = word[i];
698 val1 = word[i + last];
699 val2 = word[i + last2];
700 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
701 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1));
702 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2));
703 }
704 word += 3 * last;
705 num -= 3 * last;
706 val = x2nmodp(last, 6);
707 crc = multmodp(val, crc) ^ crc1;
708 crc = multmodp(val, crc) ^ crc2;
709 }
710
711 /* Compute the CRC on any remaining words. */
712 for (i = 0; i < num; i++) {
713 val0 = word[i];
714 __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0));
715 }
716 word += num;
717
718 /* Complete the CRC on any remaining bytes. */
719 buf = (const unsigned char FAR *)word;
720 while (len) {
721 len--;
722 val = *buf++;
723 __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val));
724 }
725
726 /* Return the CRC, post-conditioned. */
727 return crc ^ 0xffffffff;
728}
729
730#else
731
620/* ========================================================================= */ 732/* ========================================================================= */
621unsigned long ZEXPORT crc32_z(crc, buf, len) 733unsigned long ZEXPORT crc32_z(crc, buf, len)
622 unsigned long crc; 734 unsigned long crc;
@@ -939,6 +1051,8 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
939 return crc ^ 0xffffffff; 1051 return crc ^ 0xffffffff;
940} 1052}
941 1053
1054#endif
1055
942/* ========================================================================= */ 1056/* ========================================================================= */
943unsigned long ZEXPORT crc32(crc, buf, len) 1057unsigned long ZEXPORT crc32(crc, buf, len)
944 unsigned long crc; 1058 unsigned long crc;