summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjsing <>2023-01-20 17:31:52 +0000
committerjsing <>2023-01-20 17:31:52 +0000
commitec907bb8e44028294d6c2a6faf9c735ce8012e48 (patch)
treeb7a2361e00d87650a48e90b0530c6a86d27e0039
parenta50b434b87829ee0d12767c21ae98194684ab720 (diff)
downloadopenbsd-ec907bb8e44028294d6c2a6faf9c735ce8012e48.tar.gz
openbsd-ec907bb8e44028294d6c2a6faf9c735ce8012e48.tar.bz2
openbsd-ec907bb8e44028294d6c2a6faf9c735ce8012e48.zip
Move bn_{mul,sqr}_comba{4,8}() from bn_asm.c to bn_mul.c/bn_sqr.c.
Wrap these in HAVE_BN_{MUL,SQR}_COMBA{4,8} defines. Add these defines to bn_arch.h where the architecture currently provides its own version. ok tb@
-rw-r--r--src/lib/libcrypto/bn/arch/amd64/bn_arch.h8
-rw-r--r--src/lib/libcrypto/bn/arch/i386/bn_arch.h8
-rw-r--r--src/lib/libcrypto/bn/arch/mips64/bn_arch.h8
-rw-r--r--src/lib/libcrypto/bn/arch/powerpc/bn_arch.h8
-rw-r--r--src/lib/libcrypto/bn/arch/sparc/bn_arch.h8
-rw-r--r--src/lib/libcrypto/bn/bn_asm.c300
-rw-r--r--src/lib/libcrypto/bn/bn_mul.c151
-rw-r--r--src/lib/libcrypto/bn/bn_sqr.c117
8 files changed, 302 insertions, 306 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h
index 136adf0e97..17d22f3cec 100644
--- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h
+++ b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:33 jsing Exp $ */ 1/* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 17:31:52 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -20,5 +20,11 @@
20 20
21#ifndef OPENSSL_NO_ASM 21#ifndef OPENSSL_NO_ASM
22 22
23#define HAVE_BN_MUL_COMBA4
24#define HAVE_BN_MUL_COMBA8
25
26#define HAVE_BN_SQR_COMBA4
27#define HAVE_BN_SQR_COMBA8
28
23#endif 29#endif
24#endif 30#endif
diff --git a/src/lib/libcrypto/bn/arch/i386/bn_arch.h b/src/lib/libcrypto/bn/arch/i386/bn_arch.h
index 136adf0e97..17d22f3cec 100644
--- a/src/lib/libcrypto/bn/arch/i386/bn_arch.h
+++ b/src/lib/libcrypto/bn/arch/i386/bn_arch.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:33 jsing Exp $ */ 1/* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 17:31:52 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -20,5 +20,11 @@
20 20
21#ifndef OPENSSL_NO_ASM 21#ifndef OPENSSL_NO_ASM
22 22
23#define HAVE_BN_MUL_COMBA4
24#define HAVE_BN_MUL_COMBA8
25
26#define HAVE_BN_SQR_COMBA4
27#define HAVE_BN_SQR_COMBA8
28
23#endif 29#endif
24#endif 30#endif
diff --git a/src/lib/libcrypto/bn/arch/mips64/bn_arch.h b/src/lib/libcrypto/bn/arch/mips64/bn_arch.h
index 6c6212c4a6..8e8fd1110f 100644
--- a/src/lib/libcrypto/bn/arch/mips64/bn_arch.h
+++ b/src/lib/libcrypto/bn/arch/mips64/bn_arch.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 10:07:52 jsing Exp $ */ 1/* $OpenBSD: bn_arch.h,v 1.3 2023/01/20 17:31:52 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -22,5 +22,11 @@
22 22
23#define HAVE_BN_DIV_3_WORDS 23#define HAVE_BN_DIV_3_WORDS
24 24
25#define HAVE_BN_MUL_COMBA4
26#define HAVE_BN_MUL_COMBA8
27
28#define HAVE_BN_SQR_COMBA4
29#define HAVE_BN_SQR_COMBA8
30
25#endif 31#endif
26#endif 32#endif
diff --git a/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h b/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h
index 4d6571f9cb..17d22f3cec 100644
--- a/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h
+++ b/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:34 jsing Exp $ */ 1/* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 17:31:52 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -20,5 +20,11 @@
20 20
21#ifndef OPENSSL_NO_ASM 21#ifndef OPENSSL_NO_ASM
22 22
23#define HAVE_BN_MUL_COMBA4
24#define HAVE_BN_MUL_COMBA8
25
26#define HAVE_BN_SQR_COMBA4
27#define HAVE_BN_SQR_COMBA8
28
23#endif 29#endif
24#endif 30#endif
diff --git a/src/lib/libcrypto/bn/arch/sparc/bn_arch.h b/src/lib/libcrypto/bn/arch/sparc/bn_arch.h
index 4d6571f9cb..17d22f3cec 100644
--- a/src/lib/libcrypto/bn/arch/sparc/bn_arch.h
+++ b/src/lib/libcrypto/bn/arch/sparc/bn_arch.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:34 jsing Exp $ */ 1/* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 17:31:52 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -20,5 +20,11 @@
20 20
21#ifndef OPENSSL_NO_ASM 21#ifndef OPENSSL_NO_ASM
22 22
23#define HAVE_BN_MUL_COMBA4
24#define HAVE_BN_MUL_COMBA8
25
26#define HAVE_BN_SQR_COMBA4
27#define HAVE_BN_SQR_COMBA8
28
23#endif 29#endif
24#endif 30#endif
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
index 84063486b3..df4ddaea17 100644
--- a/src/lib/libcrypto/bn/bn_asm.c
+++ b/src/lib/libcrypto/bn/bn_asm.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_asm.c,v 1.18 2023/01/20 17:26:03 jsing Exp $ */ 1/* $OpenBSD: bn_asm.c,v 1.19 2023/01/20 17:31:52 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -479,265 +479,6 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
479 479
480#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) 480#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)
481 481
482#undef bn_mul_comba8
483#undef bn_mul_comba4
484#undef bn_sqr_comba8
485#undef bn_sqr_comba4
486
487void
488bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
489{
490 BN_ULONG c1, c2, c3;
491
492 c1 = 0;
493 c2 = 0;
494 c3 = 0;
495 mul_add_c(a[0], b[0], c1, c2, c3);
496 r[0] = c1;
497 c1 = 0;
498 mul_add_c(a[0], b[1], c2, c3, c1);
499 mul_add_c(a[1], b[0], c2, c3, c1);
500 r[1] = c2;
501 c2 = 0;
502 mul_add_c(a[2], b[0], c3, c1, c2);
503 mul_add_c(a[1], b[1], c3, c1, c2);
504 mul_add_c(a[0], b[2], c3, c1, c2);
505 r[2] = c3;
506 c3 = 0;
507 mul_add_c(a[0], b[3], c1, c2, c3);
508 mul_add_c(a[1], b[2], c1, c2, c3);
509 mul_add_c(a[2], b[1], c1, c2, c3);
510 mul_add_c(a[3], b[0], c1, c2, c3);
511 r[3] = c1;
512 c1 = 0;
513 mul_add_c(a[4], b[0], c2, c3, c1);
514 mul_add_c(a[3], b[1], c2, c3, c1);
515 mul_add_c(a[2], b[2], c2, c3, c1);
516 mul_add_c(a[1], b[3], c2, c3, c1);
517 mul_add_c(a[0], b[4], c2, c3, c1);
518 r[4] = c2;
519 c2 = 0;
520 mul_add_c(a[0], b[5], c3, c1, c2);
521 mul_add_c(a[1], b[4], c3, c1, c2);
522 mul_add_c(a[2], b[3], c3, c1, c2);
523 mul_add_c(a[3], b[2], c3, c1, c2);
524 mul_add_c(a[4], b[1], c3, c1, c2);
525 mul_add_c(a[5], b[0], c3, c1, c2);
526 r[5] = c3;
527 c3 = 0;
528 mul_add_c(a[6], b[0], c1, c2, c3);
529 mul_add_c(a[5], b[1], c1, c2, c3);
530 mul_add_c(a[4], b[2], c1, c2, c3);
531 mul_add_c(a[3], b[3], c1, c2, c3);
532 mul_add_c(a[2], b[4], c1, c2, c3);
533 mul_add_c(a[1], b[5], c1, c2, c3);
534 mul_add_c(a[0], b[6], c1, c2, c3);
535 r[6] = c1;
536 c1 = 0;
537 mul_add_c(a[0], b[7], c2, c3, c1);
538 mul_add_c(a[1], b[6], c2, c3, c1);
539 mul_add_c(a[2], b[5], c2, c3, c1);
540 mul_add_c(a[3], b[4], c2, c3, c1);
541 mul_add_c(a[4], b[3], c2, c3, c1);
542 mul_add_c(a[5], b[2], c2, c3, c1);
543 mul_add_c(a[6], b[1], c2, c3, c1);
544 mul_add_c(a[7], b[0], c2, c3, c1);
545 r[7] = c2;
546 c2 = 0;
547 mul_add_c(a[7], b[1], c3, c1, c2);
548 mul_add_c(a[6], b[2], c3, c1, c2);
549 mul_add_c(a[5], b[3], c3, c1, c2);
550 mul_add_c(a[4], b[4], c3, c1, c2);
551 mul_add_c(a[3], b[5], c3, c1, c2);
552 mul_add_c(a[2], b[6], c3, c1, c2);
553 mul_add_c(a[1], b[7], c3, c1, c2);
554 r[8] = c3;
555 c3 = 0;
556 mul_add_c(a[2], b[7], c1, c2, c3);
557 mul_add_c(a[3], b[6], c1, c2, c3);
558 mul_add_c(a[4], b[5], c1, c2, c3);
559 mul_add_c(a[5], b[4], c1, c2, c3);
560 mul_add_c(a[6], b[3], c1, c2, c3);
561 mul_add_c(a[7], b[2], c1, c2, c3);
562 r[9] = c1;
563 c1 = 0;
564 mul_add_c(a[7], b[3], c2, c3, c1);
565 mul_add_c(a[6], b[4], c2, c3, c1);
566 mul_add_c(a[5], b[5], c2, c3, c1);
567 mul_add_c(a[4], b[6], c2, c3, c1);
568 mul_add_c(a[3], b[7], c2, c3, c1);
569 r[10] = c2;
570 c2 = 0;
571 mul_add_c(a[4], b[7], c3, c1, c2);
572 mul_add_c(a[5], b[6], c3, c1, c2);
573 mul_add_c(a[6], b[5], c3, c1, c2);
574 mul_add_c(a[7], b[4], c3, c1, c2);
575 r[11] = c3;
576 c3 = 0;
577 mul_add_c(a[7], b[5], c1, c2, c3);
578 mul_add_c(a[6], b[6], c1, c2, c3);
579 mul_add_c(a[5], b[7], c1, c2, c3);
580 r[12] = c1;
581 c1 = 0;
582 mul_add_c(a[6], b[7], c2, c3, c1);
583 mul_add_c(a[7], b[6], c2, c3, c1);
584 r[13] = c2;
585 c2 = 0;
586 mul_add_c(a[7], b[7], c3, c1, c2);
587 r[14] = c3;
588 r[15] = c1;
589}
590
591void
592bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
593{
594 BN_ULONG c1, c2, c3;
595
596 c1 = 0;
597 c2 = 0;
598 c3 = 0;
599 mul_add_c(a[0], b[0], c1, c2, c3);
600 r[0] = c1;
601 c1 = 0;
602 mul_add_c(a[0], b[1], c2, c3, c1);
603 mul_add_c(a[1], b[0], c2, c3, c1);
604 r[1] = c2;
605 c2 = 0;
606 mul_add_c(a[2], b[0], c3, c1, c2);
607 mul_add_c(a[1], b[1], c3, c1, c2);
608 mul_add_c(a[0], b[2], c3, c1, c2);
609 r[2] = c3;
610 c3 = 0;
611 mul_add_c(a[0], b[3], c1, c2, c3);
612 mul_add_c(a[1], b[2], c1, c2, c3);
613 mul_add_c(a[2], b[1], c1, c2, c3);
614 mul_add_c(a[3], b[0], c1, c2, c3);
615 r[3] = c1;
616 c1 = 0;
617 mul_add_c(a[3], b[1], c2, c3, c1);
618 mul_add_c(a[2], b[2], c2, c3, c1);
619 mul_add_c(a[1], b[3], c2, c3, c1);
620 r[4] = c2;
621 c2 = 0;
622 mul_add_c(a[2], b[3], c3, c1, c2);
623 mul_add_c(a[3], b[2], c3, c1, c2);
624 r[5] = c3;
625 c3 = 0;
626 mul_add_c(a[3], b[3], c1, c2, c3);
627 r[6] = c1;
628 r[7] = c2;
629}
630
631void
632bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
633{
634 BN_ULONG c1, c2, c3;
635
636 c1 = 0;
637 c2 = 0;
638 c3 = 0;
639 sqr_add_c(a, 0, c1, c2, c3);
640 r[0] = c1;
641 c1 = 0;
642 sqr_add_c2(a, 1, 0, c2, c3, c1);
643 r[1] = c2;
644 c2 = 0;
645 sqr_add_c(a, 1, c3, c1, c2);
646 sqr_add_c2(a, 2, 0, c3, c1, c2);
647 r[2] = c3;
648 c3 = 0;
649 sqr_add_c2(a, 3, 0, c1, c2, c3);
650 sqr_add_c2(a, 2, 1, c1, c2, c3);
651 r[3] = c1;
652 c1 = 0;
653 sqr_add_c(a, 2, c2, c3, c1);
654 sqr_add_c2(a, 3, 1, c2, c3, c1);
655 sqr_add_c2(a, 4, 0, c2, c3, c1);
656 r[4] = c2;
657 c2 = 0;
658 sqr_add_c2(a, 5, 0, c3, c1, c2);
659 sqr_add_c2(a, 4, 1, c3, c1, c2);
660 sqr_add_c2(a, 3, 2, c3, c1, c2);
661 r[5] = c3;
662 c3 = 0;
663 sqr_add_c(a, 3, c1, c2, c3);
664 sqr_add_c2(a, 4, 2, c1, c2, c3);
665 sqr_add_c2(a, 5, 1, c1, c2, c3);
666 sqr_add_c2(a, 6, 0, c1, c2, c3);
667 r[6] = c1;
668 c1 = 0;
669 sqr_add_c2(a, 7, 0, c2, c3, c1);
670 sqr_add_c2(a, 6, 1, c2, c3, c1);
671 sqr_add_c2(a, 5, 2, c2, c3, c1);
672 sqr_add_c2(a, 4, 3, c2, c3, c1);
673 r[7] = c2;
674 c2 = 0;
675 sqr_add_c(a, 4, c3, c1, c2);
676 sqr_add_c2(a, 5, 3, c3, c1, c2);
677 sqr_add_c2(a, 6, 2, c3, c1, c2);
678 sqr_add_c2(a, 7, 1, c3, c1, c2);
679 r[8] = c3;
680 c3 = 0;
681 sqr_add_c2(a, 7, 2, c1, c2, c3);
682 sqr_add_c2(a, 6, 3, c1, c2, c3);
683 sqr_add_c2(a, 5, 4, c1, c2, c3);
684 r[9] = c1;
685 c1 = 0;
686 sqr_add_c(a, 5, c2, c3, c1);
687 sqr_add_c2(a, 6, 4, c2, c3, c1);
688 sqr_add_c2(a, 7, 3, c2, c3, c1);
689 r[10] = c2;
690 c2 = 0;
691 sqr_add_c2(a, 7, 4, c3, c1, c2);
692 sqr_add_c2(a, 6, 5, c3, c1, c2);
693 r[11] = c3;
694 c3 = 0;
695 sqr_add_c(a, 6, c1, c2, c3);
696 sqr_add_c2(a, 7, 5, c1, c2, c3);
697 r[12] = c1;
698 c1 = 0;
699 sqr_add_c2(a, 7, 6, c2, c3, c1);
700 r[13] = c2;
701 c2 = 0;
702 sqr_add_c(a, 7, c3, c1, c2);
703 r[14] = c3;
704 r[15] = c1;
705}
706
707void
708bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
709{
710 BN_ULONG c1, c2, c3;
711
712 c1 = 0;
713 c2 = 0;
714 c3 = 0;
715 sqr_add_c(a, 0, c1, c2, c3);
716 r[0] = c1;
717 c1 = 0;
718 sqr_add_c2(a, 1, 0, c2, c3, c1);
719 r[1] = c2;
720 c2 = 0;
721 sqr_add_c(a, 1, c3, c1, c2);
722 sqr_add_c2(a, 2, 0, c3, c1, c2);
723 r[2] = c3;
724 c3 = 0;
725 sqr_add_c2(a, 3, 0, c1, c2, c3);
726 sqr_add_c2(a, 2, 1, c1, c2, c3);
727 r[3] = c1;
728 c1 = 0;
729 sqr_add_c(a, 2, c2, c3, c1);
730 sqr_add_c2(a, 3, 1, c2, c3, c1);
731 r[4] = c2;
732 c2 = 0;
733 sqr_add_c2(a, 3, 2, c3, c1, c2);
734 r[5] = c3;
735 c3 = 0;
736 sqr_add_c(a, 3, c1, c2, c3);
737 r[6] = c1;
738 r[7] = c2;
739}
740
741#ifdef OPENSSL_NO_ASM 482#ifdef OPENSSL_NO_ASM
742#ifdef OPENSSL_BN_ASM_MONT 483#ifdef OPENSSL_BN_ASM_MONT
743/* 484/*
@@ -853,45 +594,6 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
853 594
854#else /* !BN_MUL_COMBA */ 595#else /* !BN_MUL_COMBA */
855 596
856/* hmm... is it faster just to do a multiply? */
857#undef bn_sqr_comba4
858void
859bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
860{
861 BN_ULONG t[8];
862 bn_sqr_normal(r, a, 4, t);
863}
864
865#undef bn_sqr_comba8
866void
867bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
868{
869 BN_ULONG t[16];
870 bn_sqr_normal(r, a, 8, t);
871}
872
873void
874bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
875{
876 r[4] = bn_mul_words(&(r[0]), a, 4, b[0]);
877 r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]);
878 r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]);
879 r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]);
880}
881
882void
883bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
884{
885 r[8] = bn_mul_words(&(r[0]), a, 8, b[0]);
886 r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]);
887 r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]);
888 r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]);
889 r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]);
890 r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]);
891 r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]);
892 r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]);
893}
894
895#ifdef OPENSSL_NO_ASM 597#ifdef OPENSSL_NO_ASM
896#ifdef OPENSSL_BN_ASM_MONT 598#ifdef OPENSSL_BN_ASM_MONT
897int 599int
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c
index b7a7f8bcef..3a69ef35da 100644
--- a/src/lib/libcrypto/bn/bn_mul.c
+++ b/src/lib/libcrypto/bn/bn_mul.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_mul.c,v 1.27 2023/01/20 12:16:46 jsing Exp $ */ 1/* $OpenBSD: bn_mul.c,v 1.28 2023/01/20 17:31:52 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -62,8 +62,157 @@
62 62
63#include <openssl/opensslconf.h> 63#include <openssl/opensslconf.h>
64 64
65#include "bn_arch.h"
65#include "bn_local.h" 66#include "bn_local.h"
66 67
68#ifndef HAVE_BN_MUL_COMBA4
69void
70bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
71{
72 BN_ULONG c1, c2, c3;
73
74 c1 = 0;
75 c2 = 0;
76 c3 = 0;
77 mul_add_c(a[0], b[0], c1, c2, c3);
78 r[0] = c1;
79 c1 = 0;
80 mul_add_c(a[0], b[1], c2, c3, c1);
81 mul_add_c(a[1], b[0], c2, c3, c1);
82 r[1] = c2;
83 c2 = 0;
84 mul_add_c(a[2], b[0], c3, c1, c2);
85 mul_add_c(a[1], b[1], c3, c1, c2);
86 mul_add_c(a[0], b[2], c3, c1, c2);
87 r[2] = c3;
88 c3 = 0;
89 mul_add_c(a[0], b[3], c1, c2, c3);
90 mul_add_c(a[1], b[2], c1, c2, c3);
91 mul_add_c(a[2], b[1], c1, c2, c3);
92 mul_add_c(a[3], b[0], c1, c2, c3);
93 r[3] = c1;
94 c1 = 0;
95 mul_add_c(a[3], b[1], c2, c3, c1);
96 mul_add_c(a[2], b[2], c2, c3, c1);
97 mul_add_c(a[1], b[3], c2, c3, c1);
98 r[4] = c2;
99 c2 = 0;
100 mul_add_c(a[2], b[3], c3, c1, c2);
101 mul_add_c(a[3], b[2], c3, c1, c2);
102 r[5] = c3;
103 c3 = 0;
104 mul_add_c(a[3], b[3], c1, c2, c3);
105 r[6] = c1;
106 r[7] = c2;
107}
108#endif
109
110#ifndef HAVE_BN_MUL_COMBA8
111void
112bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
113{
114 BN_ULONG c1, c2, c3;
115
116 c1 = 0;
117 c2 = 0;
118 c3 = 0;
119 mul_add_c(a[0], b[0], c1, c2, c3);
120 r[0] = c1;
121 c1 = 0;
122 mul_add_c(a[0], b[1], c2, c3, c1);
123 mul_add_c(a[1], b[0], c2, c3, c1);
124 r[1] = c2;
125 c2 = 0;
126 mul_add_c(a[2], b[0], c3, c1, c2);
127 mul_add_c(a[1], b[1], c3, c1, c2);
128 mul_add_c(a[0], b[2], c3, c1, c2);
129 r[2] = c3;
130 c3 = 0;
131 mul_add_c(a[0], b[3], c1, c2, c3);
132 mul_add_c(a[1], b[2], c1, c2, c3);
133 mul_add_c(a[2], b[1], c1, c2, c3);
134 mul_add_c(a[3], b[0], c1, c2, c3);
135 r[3] = c1;
136 c1 = 0;
137 mul_add_c(a[4], b[0], c2, c3, c1);
138 mul_add_c(a[3], b[1], c2, c3, c1);
139 mul_add_c(a[2], b[2], c2, c3, c1);
140 mul_add_c(a[1], b[3], c2, c3, c1);
141 mul_add_c(a[0], b[4], c2, c3, c1);
142 r[4] = c2;
143 c2 = 0;
144 mul_add_c(a[0], b[5], c3, c1, c2);
145 mul_add_c(a[1], b[4], c3, c1, c2);
146 mul_add_c(a[2], b[3], c3, c1, c2);
147 mul_add_c(a[3], b[2], c3, c1, c2);
148 mul_add_c(a[4], b[1], c3, c1, c2);
149 mul_add_c(a[5], b[0], c3, c1, c2);
150 r[5] = c3;
151 c3 = 0;
152 mul_add_c(a[6], b[0], c1, c2, c3);
153 mul_add_c(a[5], b[1], c1, c2, c3);
154 mul_add_c(a[4], b[2], c1, c2, c3);
155 mul_add_c(a[3], b[3], c1, c2, c3);
156 mul_add_c(a[2], b[4], c1, c2, c3);
157 mul_add_c(a[1], b[5], c1, c2, c3);
158 mul_add_c(a[0], b[6], c1, c2, c3);
159 r[6] = c1;
160 c1 = 0;
161 mul_add_c(a[0], b[7], c2, c3, c1);
162 mul_add_c(a[1], b[6], c2, c3, c1);
163 mul_add_c(a[2], b[5], c2, c3, c1);
164 mul_add_c(a[3], b[4], c2, c3, c1);
165 mul_add_c(a[4], b[3], c2, c3, c1);
166 mul_add_c(a[5], b[2], c2, c3, c1);
167 mul_add_c(a[6], b[1], c2, c3, c1);
168 mul_add_c(a[7], b[0], c2, c3, c1);
169 r[7] = c2;
170 c2 = 0;
171 mul_add_c(a[7], b[1], c3, c1, c2);
172 mul_add_c(a[6], b[2], c3, c1, c2);
173 mul_add_c(a[5], b[3], c3, c1, c2);
174 mul_add_c(a[4], b[4], c3, c1, c2);
175 mul_add_c(a[3], b[5], c3, c1, c2);
176 mul_add_c(a[2], b[6], c3, c1, c2);
177 mul_add_c(a[1], b[7], c3, c1, c2);
178 r[8] = c3;
179 c3 = 0;
180 mul_add_c(a[2], b[7], c1, c2, c3);
181 mul_add_c(a[3], b[6], c1, c2, c3);
182 mul_add_c(a[4], b[5], c1, c2, c3);
183 mul_add_c(a[5], b[4], c1, c2, c3);
184 mul_add_c(a[6], b[3], c1, c2, c3);
185 mul_add_c(a[7], b[2], c1, c2, c3);
186 r[9] = c1;
187 c1 = 0;
188 mul_add_c(a[7], b[3], c2, c3, c1);
189 mul_add_c(a[6], b[4], c2, c3, c1);
190 mul_add_c(a[5], b[5], c2, c3, c1);
191 mul_add_c(a[4], b[6], c2, c3, c1);
192 mul_add_c(a[3], b[7], c2, c3, c1);
193 r[10] = c2;
194 c2 = 0;
195 mul_add_c(a[4], b[7], c3, c1, c2);
196 mul_add_c(a[5], b[6], c3, c1, c2);
197 mul_add_c(a[6], b[5], c3, c1, c2);
198 mul_add_c(a[7], b[4], c3, c1, c2);
199 r[11] = c3;
200 c3 = 0;
201 mul_add_c(a[7], b[5], c1, c2, c3);
202 mul_add_c(a[6], b[6], c1, c2, c3);
203 mul_add_c(a[5], b[7], c1, c2, c3);
204 r[12] = c1;
205 c1 = 0;
206 mul_add_c(a[6], b[7], c2, c3, c1);
207 mul_add_c(a[7], b[6], c2, c3, c1);
208 r[13] = c2;
209 c2 = 0;
210 mul_add_c(a[7], b[7], c3, c1, c2);
211 r[14] = c3;
212 r[15] = c1;
213}
214#endif
215
67#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) 216#if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
68/* 217/*
69 * Here follows a specialised variant of bn_sub_words(), which has the property 218 * Here follows a specialised variant of bn_sub_words(), which has the property
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c
index 56ea378527..02b87556d4 100644
--- a/src/lib/libcrypto/bn/bn_sqr.c
+++ b/src/lib/libcrypto/bn/bn_sqr.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_sqr.c,v 1.18 2023/01/16 17:56:25 jsing Exp $ */ 1/* $OpenBSD: bn_sqr.c,v 1.19 2023/01/20 17:31:52 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -59,8 +59,123 @@
59#include <stdio.h> 59#include <stdio.h>
60#include <string.h> 60#include <string.h>
61 61
62#include "bn_arch.h"
62#include "bn_local.h" 63#include "bn_local.h"
63 64
65#ifndef HAVE_BN_SQR_COMBA4
66void
67bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
68{
69 BN_ULONG c1, c2, c3;
70
71 c1 = 0;
72 c2 = 0;
73 c3 = 0;
74 sqr_add_c(a, 0, c1, c2, c3);
75 r[0] = c1;
76 c1 = 0;
77 sqr_add_c2(a, 1, 0, c2, c3, c1);
78 r[1] = c2;
79 c2 = 0;
80 sqr_add_c(a, 1, c3, c1, c2);
81 sqr_add_c2(a, 2, 0, c3, c1, c2);
82 r[2] = c3;
83 c3 = 0;
84 sqr_add_c2(a, 3, 0, c1, c2, c3);
85 sqr_add_c2(a, 2, 1, c1, c2, c3);
86 r[3] = c1;
87 c1 = 0;
88 sqr_add_c(a, 2, c2, c3, c1);
89 sqr_add_c2(a, 3, 1, c2, c3, c1);
90 r[4] = c2;
91 c2 = 0;
92 sqr_add_c2(a, 3, 2, c3, c1, c2);
93 r[5] = c3;
94 c3 = 0;
95 sqr_add_c(a, 3, c1, c2, c3);
96 r[6] = c1;
97 r[7] = c2;
98}
99#endif
100
101#ifndef HAVE_BN_SQR_COMBA8
102void
103bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
104{
105 BN_ULONG c1, c2, c3;
106
107 c1 = 0;
108 c2 = 0;
109 c3 = 0;
110 sqr_add_c(a, 0, c1, c2, c3);
111 r[0] = c1;
112 c1 = 0;
113 sqr_add_c2(a, 1, 0, c2, c3, c1);
114 r[1] = c2;
115 c2 = 0;
116 sqr_add_c(a, 1, c3, c1, c2);
117 sqr_add_c2(a, 2, 0, c3, c1, c2);
118 r[2] = c3;
119 c3 = 0;
120 sqr_add_c2(a, 3, 0, c1, c2, c3);
121 sqr_add_c2(a, 2, 1, c1, c2, c3);
122 r[3] = c1;
123 c1 = 0;
124 sqr_add_c(a, 2, c2, c3, c1);
125 sqr_add_c2(a, 3, 1, c2, c3, c1);
126 sqr_add_c2(a, 4, 0, c2, c3, c1);
127 r[4] = c2;
128 c2 = 0;
129 sqr_add_c2(a, 5, 0, c3, c1, c2);
130 sqr_add_c2(a, 4, 1, c3, c1, c2);
131 sqr_add_c2(a, 3, 2, c3, c1, c2);
132 r[5] = c3;
133 c3 = 0;
134 sqr_add_c(a, 3, c1, c2, c3);
135 sqr_add_c2(a, 4, 2, c1, c2, c3);
136 sqr_add_c2(a, 5, 1, c1, c2, c3);
137 sqr_add_c2(a, 6, 0, c1, c2, c3);
138 r[6] = c1;
139 c1 = 0;
140 sqr_add_c2(a, 7, 0, c2, c3, c1);
141 sqr_add_c2(a, 6, 1, c2, c3, c1);
142 sqr_add_c2(a, 5, 2, c2, c3, c1);
143 sqr_add_c2(a, 4, 3, c2, c3, c1);
144 r[7] = c2;
145 c2 = 0;
146 sqr_add_c(a, 4, c3, c1, c2);
147 sqr_add_c2(a, 5, 3, c3, c1, c2);
148 sqr_add_c2(a, 6, 2, c3, c1, c2);
149 sqr_add_c2(a, 7, 1, c3, c1, c2);
150 r[8] = c3;
151 c3 = 0;
152 sqr_add_c2(a, 7, 2, c1, c2, c3);
153 sqr_add_c2(a, 6, 3, c1, c2, c3);
154 sqr_add_c2(a, 5, 4, c1, c2, c3);
155 r[9] = c1;
156 c1 = 0;
157 sqr_add_c(a, 5, c2, c3, c1);
158 sqr_add_c2(a, 6, 4, c2, c3, c1);
159 sqr_add_c2(a, 7, 3, c2, c3, c1);
160 r[10] = c2;
161 c2 = 0;
162 sqr_add_c2(a, 7, 4, c3, c1, c2);
163 sqr_add_c2(a, 6, 5, c3, c1, c2);
164 r[11] = c3;
165 c3 = 0;
166 sqr_add_c(a, 6, c1, c2, c3);
167 sqr_add_c2(a, 7, 5, c1, c2, c3);
168 r[12] = c1;
169 c1 = 0;
170 sqr_add_c2(a, 7, 6, c2, c3, c1);
171 r[13] = c2;
172 c2 = 0;
173 sqr_add_c(a, 7, c3, c1, c2);
174 r[14] = c3;
175 r[15] = c1;
176}
177#endif
178
64/* tmp must have 2*n words */ 179/* tmp must have 2*n words */
65void 180void
66bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp) 181bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)