diff options
author | jsing <> | 2023-01-20 17:31:52 +0000 |
---|---|---|
committer | jsing <> | 2023-01-20 17:31:52 +0000 |
commit | ec907bb8e44028294d6c2a6faf9c735ce8012e48 (patch) | |
tree | b7a2361e00d87650a48e90b0530c6a86d27e0039 /src/lib/libcrypto/bn/bn_asm.c | |
parent | a50b434b87829ee0d12767c21ae98194684ab720 (diff) | |
download | openbsd-ec907bb8e44028294d6c2a6faf9c735ce8012e48.tar.gz openbsd-ec907bb8e44028294d6c2a6faf9c735ce8012e48.tar.bz2 openbsd-ec907bb8e44028294d6c2a6faf9c735ce8012e48.zip |
Move bn_{mul,sqr}_comba{4,8}() from bn_asm.c to bn_mul.c/bn_sqr.c.
Wrap these in HAVE_BN_{MUL,SQR}_COMBA{4,8} defines. Add these defines to
bn_arch.h where the architecture currently provides its own version.
ok tb@
Diffstat (limited to 'src/lib/libcrypto/bn/bn_asm.c')
-rw-r--r-- | src/lib/libcrypto/bn/bn_asm.c | 300 |
1 files changed, 1 insertions, 299 deletions
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index 84063486b3..df4ddaea17 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_asm.c,v 1.18 2023/01/20 17:26:03 jsing Exp $ */ | 1 | /* $OpenBSD: bn_asm.c,v 1.19 2023/01/20 17:31:52 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -479,265 +479,6 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
479 | 479 | ||
480 | #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) | 480 | #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) |
481 | 481 | ||
482 | #undef bn_mul_comba8 | ||
483 | #undef bn_mul_comba4 | ||
484 | #undef bn_sqr_comba8 | ||
485 | #undef bn_sqr_comba4 | ||
486 | |||
487 | void | ||
488 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
489 | { | ||
490 | BN_ULONG c1, c2, c3; | ||
491 | |||
492 | c1 = 0; | ||
493 | c2 = 0; | ||
494 | c3 = 0; | ||
495 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
496 | r[0] = c1; | ||
497 | c1 = 0; | ||
498 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
499 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
500 | r[1] = c2; | ||
501 | c2 = 0; | ||
502 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
503 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
504 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
505 | r[2] = c3; | ||
506 | c3 = 0; | ||
507 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
508 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
509 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
510 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
511 | r[3] = c1; | ||
512 | c1 = 0; | ||
513 | mul_add_c(a[4], b[0], c2, c3, c1); | ||
514 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
515 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
516 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
517 | mul_add_c(a[0], b[4], c2, c3, c1); | ||
518 | r[4] = c2; | ||
519 | c2 = 0; | ||
520 | mul_add_c(a[0], b[5], c3, c1, c2); | ||
521 | mul_add_c(a[1], b[4], c3, c1, c2); | ||
522 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
523 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
524 | mul_add_c(a[4], b[1], c3, c1, c2); | ||
525 | mul_add_c(a[5], b[0], c3, c1, c2); | ||
526 | r[5] = c3; | ||
527 | c3 = 0; | ||
528 | mul_add_c(a[6], b[0], c1, c2, c3); | ||
529 | mul_add_c(a[5], b[1], c1, c2, c3); | ||
530 | mul_add_c(a[4], b[2], c1, c2, c3); | ||
531 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
532 | mul_add_c(a[2], b[4], c1, c2, c3); | ||
533 | mul_add_c(a[1], b[5], c1, c2, c3); | ||
534 | mul_add_c(a[0], b[6], c1, c2, c3); | ||
535 | r[6] = c1; | ||
536 | c1 = 0; | ||
537 | mul_add_c(a[0], b[7], c2, c3, c1); | ||
538 | mul_add_c(a[1], b[6], c2, c3, c1); | ||
539 | mul_add_c(a[2], b[5], c2, c3, c1); | ||
540 | mul_add_c(a[3], b[4], c2, c3, c1); | ||
541 | mul_add_c(a[4], b[3], c2, c3, c1); | ||
542 | mul_add_c(a[5], b[2], c2, c3, c1); | ||
543 | mul_add_c(a[6], b[1], c2, c3, c1); | ||
544 | mul_add_c(a[7], b[0], c2, c3, c1); | ||
545 | r[7] = c2; | ||
546 | c2 = 0; | ||
547 | mul_add_c(a[7], b[1], c3, c1, c2); | ||
548 | mul_add_c(a[6], b[2], c3, c1, c2); | ||
549 | mul_add_c(a[5], b[3], c3, c1, c2); | ||
550 | mul_add_c(a[4], b[4], c3, c1, c2); | ||
551 | mul_add_c(a[3], b[5], c3, c1, c2); | ||
552 | mul_add_c(a[2], b[6], c3, c1, c2); | ||
553 | mul_add_c(a[1], b[7], c3, c1, c2); | ||
554 | r[8] = c3; | ||
555 | c3 = 0; | ||
556 | mul_add_c(a[2], b[7], c1, c2, c3); | ||
557 | mul_add_c(a[3], b[6], c1, c2, c3); | ||
558 | mul_add_c(a[4], b[5], c1, c2, c3); | ||
559 | mul_add_c(a[5], b[4], c1, c2, c3); | ||
560 | mul_add_c(a[6], b[3], c1, c2, c3); | ||
561 | mul_add_c(a[7], b[2], c1, c2, c3); | ||
562 | r[9] = c1; | ||
563 | c1 = 0; | ||
564 | mul_add_c(a[7], b[3], c2, c3, c1); | ||
565 | mul_add_c(a[6], b[4], c2, c3, c1); | ||
566 | mul_add_c(a[5], b[5], c2, c3, c1); | ||
567 | mul_add_c(a[4], b[6], c2, c3, c1); | ||
568 | mul_add_c(a[3], b[7], c2, c3, c1); | ||
569 | r[10] = c2; | ||
570 | c2 = 0; | ||
571 | mul_add_c(a[4], b[7], c3, c1, c2); | ||
572 | mul_add_c(a[5], b[6], c3, c1, c2); | ||
573 | mul_add_c(a[6], b[5], c3, c1, c2); | ||
574 | mul_add_c(a[7], b[4], c3, c1, c2); | ||
575 | r[11] = c3; | ||
576 | c3 = 0; | ||
577 | mul_add_c(a[7], b[5], c1, c2, c3); | ||
578 | mul_add_c(a[6], b[6], c1, c2, c3); | ||
579 | mul_add_c(a[5], b[7], c1, c2, c3); | ||
580 | r[12] = c1; | ||
581 | c1 = 0; | ||
582 | mul_add_c(a[6], b[7], c2, c3, c1); | ||
583 | mul_add_c(a[7], b[6], c2, c3, c1); | ||
584 | r[13] = c2; | ||
585 | c2 = 0; | ||
586 | mul_add_c(a[7], b[7], c3, c1, c2); | ||
587 | r[14] = c3; | ||
588 | r[15] = c1; | ||
589 | } | ||
590 | |||
591 | void | ||
592 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
593 | { | ||
594 | BN_ULONG c1, c2, c3; | ||
595 | |||
596 | c1 = 0; | ||
597 | c2 = 0; | ||
598 | c3 = 0; | ||
599 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
600 | r[0] = c1; | ||
601 | c1 = 0; | ||
602 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
603 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
604 | r[1] = c2; | ||
605 | c2 = 0; | ||
606 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
607 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
608 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
609 | r[2] = c3; | ||
610 | c3 = 0; | ||
611 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
612 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
613 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
614 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
615 | r[3] = c1; | ||
616 | c1 = 0; | ||
617 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
618 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
619 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
620 | r[4] = c2; | ||
621 | c2 = 0; | ||
622 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
623 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
624 | r[5] = c3; | ||
625 | c3 = 0; | ||
626 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
627 | r[6] = c1; | ||
628 | r[7] = c2; | ||
629 | } | ||
630 | |||
631 | void | ||
632 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | ||
633 | { | ||
634 | BN_ULONG c1, c2, c3; | ||
635 | |||
636 | c1 = 0; | ||
637 | c2 = 0; | ||
638 | c3 = 0; | ||
639 | sqr_add_c(a, 0, c1, c2, c3); | ||
640 | r[0] = c1; | ||
641 | c1 = 0; | ||
642 | sqr_add_c2(a, 1, 0, c2, c3, c1); | ||
643 | r[1] = c2; | ||
644 | c2 = 0; | ||
645 | sqr_add_c(a, 1, c3, c1, c2); | ||
646 | sqr_add_c2(a, 2, 0, c3, c1, c2); | ||
647 | r[2] = c3; | ||
648 | c3 = 0; | ||
649 | sqr_add_c2(a, 3, 0, c1, c2, c3); | ||
650 | sqr_add_c2(a, 2, 1, c1, c2, c3); | ||
651 | r[3] = c1; | ||
652 | c1 = 0; | ||
653 | sqr_add_c(a, 2, c2, c3, c1); | ||
654 | sqr_add_c2(a, 3, 1, c2, c3, c1); | ||
655 | sqr_add_c2(a, 4, 0, c2, c3, c1); | ||
656 | r[4] = c2; | ||
657 | c2 = 0; | ||
658 | sqr_add_c2(a, 5, 0, c3, c1, c2); | ||
659 | sqr_add_c2(a, 4, 1, c3, c1, c2); | ||
660 | sqr_add_c2(a, 3, 2, c3, c1, c2); | ||
661 | r[5] = c3; | ||
662 | c3 = 0; | ||
663 | sqr_add_c(a, 3, c1, c2, c3); | ||
664 | sqr_add_c2(a, 4, 2, c1, c2, c3); | ||
665 | sqr_add_c2(a, 5, 1, c1, c2, c3); | ||
666 | sqr_add_c2(a, 6, 0, c1, c2, c3); | ||
667 | r[6] = c1; | ||
668 | c1 = 0; | ||
669 | sqr_add_c2(a, 7, 0, c2, c3, c1); | ||
670 | sqr_add_c2(a, 6, 1, c2, c3, c1); | ||
671 | sqr_add_c2(a, 5, 2, c2, c3, c1); | ||
672 | sqr_add_c2(a, 4, 3, c2, c3, c1); | ||
673 | r[7] = c2; | ||
674 | c2 = 0; | ||
675 | sqr_add_c(a, 4, c3, c1, c2); | ||
676 | sqr_add_c2(a, 5, 3, c3, c1, c2); | ||
677 | sqr_add_c2(a, 6, 2, c3, c1, c2); | ||
678 | sqr_add_c2(a, 7, 1, c3, c1, c2); | ||
679 | r[8] = c3; | ||
680 | c3 = 0; | ||
681 | sqr_add_c2(a, 7, 2, c1, c2, c3); | ||
682 | sqr_add_c2(a, 6, 3, c1, c2, c3); | ||
683 | sqr_add_c2(a, 5, 4, c1, c2, c3); | ||
684 | r[9] = c1; | ||
685 | c1 = 0; | ||
686 | sqr_add_c(a, 5, c2, c3, c1); | ||
687 | sqr_add_c2(a, 6, 4, c2, c3, c1); | ||
688 | sqr_add_c2(a, 7, 3, c2, c3, c1); | ||
689 | r[10] = c2; | ||
690 | c2 = 0; | ||
691 | sqr_add_c2(a, 7, 4, c3, c1, c2); | ||
692 | sqr_add_c2(a, 6, 5, c3, c1, c2); | ||
693 | r[11] = c3; | ||
694 | c3 = 0; | ||
695 | sqr_add_c(a, 6, c1, c2, c3); | ||
696 | sqr_add_c2(a, 7, 5, c1, c2, c3); | ||
697 | r[12] = c1; | ||
698 | c1 = 0; | ||
699 | sqr_add_c2(a, 7, 6, c2, c3, c1); | ||
700 | r[13] = c2; | ||
701 | c2 = 0; | ||
702 | sqr_add_c(a, 7, c3, c1, c2); | ||
703 | r[14] = c3; | ||
704 | r[15] = c1; | ||
705 | } | ||
706 | |||
707 | void | ||
708 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | ||
709 | { | ||
710 | BN_ULONG c1, c2, c3; | ||
711 | |||
712 | c1 = 0; | ||
713 | c2 = 0; | ||
714 | c3 = 0; | ||
715 | sqr_add_c(a, 0, c1, c2, c3); | ||
716 | r[0] = c1; | ||
717 | c1 = 0; | ||
718 | sqr_add_c2(a, 1, 0, c2, c3, c1); | ||
719 | r[1] = c2; | ||
720 | c2 = 0; | ||
721 | sqr_add_c(a, 1, c3, c1, c2); | ||
722 | sqr_add_c2(a, 2, 0, c3, c1, c2); | ||
723 | r[2] = c3; | ||
724 | c3 = 0; | ||
725 | sqr_add_c2(a, 3, 0, c1, c2, c3); | ||
726 | sqr_add_c2(a, 2, 1, c1, c2, c3); | ||
727 | r[3] = c1; | ||
728 | c1 = 0; | ||
729 | sqr_add_c(a, 2, c2, c3, c1); | ||
730 | sqr_add_c2(a, 3, 1, c2, c3, c1); | ||
731 | r[4] = c2; | ||
732 | c2 = 0; | ||
733 | sqr_add_c2(a, 3, 2, c3, c1, c2); | ||
734 | r[5] = c3; | ||
735 | c3 = 0; | ||
736 | sqr_add_c(a, 3, c1, c2, c3); | ||
737 | r[6] = c1; | ||
738 | r[7] = c2; | ||
739 | } | ||
740 | |||
741 | #ifdef OPENSSL_NO_ASM | 482 | #ifdef OPENSSL_NO_ASM |
742 | #ifdef OPENSSL_BN_ASM_MONT | 483 | #ifdef OPENSSL_BN_ASM_MONT |
743 | /* | 484 | /* |
@@ -853,45 +594,6 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U | |||
853 | 594 | ||
854 | #else /* !BN_MUL_COMBA */ | 595 | #else /* !BN_MUL_COMBA */ |
855 | 596 | ||
856 | /* hmm... is it faster just to do a multiply? */ | ||
857 | #undef bn_sqr_comba4 | ||
858 | void | ||
859 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | ||
860 | { | ||
861 | BN_ULONG t[8]; | ||
862 | bn_sqr_normal(r, a, 4, t); | ||
863 | } | ||
864 | |||
865 | #undef bn_sqr_comba8 | ||
866 | void | ||
867 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | ||
868 | { | ||
869 | BN_ULONG t[16]; | ||
870 | bn_sqr_normal(r, a, 8, t); | ||
871 | } | ||
872 | |||
873 | void | ||
874 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
875 | { | ||
876 | r[4] = bn_mul_words(&(r[0]), a, 4, b[0]); | ||
877 | r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]); | ||
878 | r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]); | ||
879 | r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]); | ||
880 | } | ||
881 | |||
882 | void | ||
883 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
884 | { | ||
885 | r[8] = bn_mul_words(&(r[0]), a, 8, b[0]); | ||
886 | r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]); | ||
887 | r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]); | ||
888 | r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]); | ||
889 | r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]); | ||
890 | r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]); | ||
891 | r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]); | ||
892 | r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]); | ||
893 | } | ||
894 | |||
895 | #ifdef OPENSSL_NO_ASM | 597 | #ifdef OPENSSL_NO_ASM |
896 | #ifdef OPENSSL_BN_ASM_MONT | 598 | #ifdef OPENSSL_BN_ASM_MONT |
897 | int | 599 | int |