diff options
author | jsing <> | 2023-01-20 17:31:52 +0000 |
---|---|---|
committer | jsing <> | 2023-01-20 17:31:52 +0000 |
commit | ec907bb8e44028294d6c2a6faf9c735ce8012e48 (patch) | |
tree | b7a2361e00d87650a48e90b0530c6a86d27e0039 /src | |
parent | a50b434b87829ee0d12767c21ae98194684ab720 (diff) | |
download | openbsd-ec907bb8e44028294d6c2a6faf9c735ce8012e48.tar.gz openbsd-ec907bb8e44028294d6c2a6faf9c735ce8012e48.tar.bz2 openbsd-ec907bb8e44028294d6c2a6faf9c735ce8012e48.zip |
Move bn_{mul,sqr}_comba{4,8}() from bn_asm.c to bn_mul.c/bn_sqr.c.
Wrap these in HAVE_BN_{MUL,SQR}_COMBA{4,8} defines. Add these defines to
bn_arch.h where the architecture currently provides its own version.
ok tb@
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bn_arch.h | 8 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/i386/bn_arch.h | 8 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/mips64/bn_arch.h | 8 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/powerpc/bn_arch.h | 8 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/arch/sparc/bn_arch.h | 8 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_asm.c | 300 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_mul.c | 151 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_sqr.c | 117 |
8 files changed, 302 insertions, 306 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h index 136adf0e97..17d22f3cec 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:33 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 17:31:52 jsing Exp $ */ |
2 | /* | 2 | /* |
3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
4 | * | 4 | * |
@@ -20,5 +20,11 @@ | |||
20 | 20 | ||
21 | #ifndef OPENSSL_NO_ASM | 21 | #ifndef OPENSSL_NO_ASM |
22 | 22 | ||
23 | #define HAVE_BN_MUL_COMBA4 | ||
24 | #define HAVE_BN_MUL_COMBA8 | ||
25 | |||
26 | #define HAVE_BN_SQR_COMBA4 | ||
27 | #define HAVE_BN_SQR_COMBA8 | ||
28 | |||
23 | #endif | 29 | #endif |
24 | #endif | 30 | #endif |
diff --git a/src/lib/libcrypto/bn/arch/i386/bn_arch.h b/src/lib/libcrypto/bn/arch/i386/bn_arch.h index 136adf0e97..17d22f3cec 100644 --- a/src/lib/libcrypto/bn/arch/i386/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/i386/bn_arch.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:33 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 17:31:52 jsing Exp $ */ |
2 | /* | 2 | /* |
3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
4 | * | 4 | * |
@@ -20,5 +20,11 @@ | |||
20 | 20 | ||
21 | #ifndef OPENSSL_NO_ASM | 21 | #ifndef OPENSSL_NO_ASM |
22 | 22 | ||
23 | #define HAVE_BN_MUL_COMBA4 | ||
24 | #define HAVE_BN_MUL_COMBA8 | ||
25 | |||
26 | #define HAVE_BN_SQR_COMBA4 | ||
27 | #define HAVE_BN_SQR_COMBA8 | ||
28 | |||
23 | #endif | 29 | #endif |
24 | #endif | 30 | #endif |
diff --git a/src/lib/libcrypto/bn/arch/mips64/bn_arch.h b/src/lib/libcrypto/bn/arch/mips64/bn_arch.h index 6c6212c4a6..8e8fd1110f 100644 --- a/src/lib/libcrypto/bn/arch/mips64/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/mips64/bn_arch.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 10:07:52 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.3 2023/01/20 17:31:52 jsing Exp $ */ |
2 | /* | 2 | /* |
3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
4 | * | 4 | * |
@@ -22,5 +22,11 @@ | |||
22 | 22 | ||
23 | #define HAVE_BN_DIV_3_WORDS | 23 | #define HAVE_BN_DIV_3_WORDS |
24 | 24 | ||
25 | #define HAVE_BN_MUL_COMBA4 | ||
26 | #define HAVE_BN_MUL_COMBA8 | ||
27 | |||
28 | #define HAVE_BN_SQR_COMBA4 | ||
29 | #define HAVE_BN_SQR_COMBA8 | ||
30 | |||
25 | #endif | 31 | #endif |
26 | #endif | 32 | #endif |
diff --git a/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h b/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h index 4d6571f9cb..17d22f3cec 100644 --- a/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:34 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 17:31:52 jsing Exp $ */ |
2 | /* | 2 | /* |
3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
4 | * | 4 | * |
@@ -20,5 +20,11 @@ | |||
20 | 20 | ||
21 | #ifndef OPENSSL_NO_ASM | 21 | #ifndef OPENSSL_NO_ASM |
22 | 22 | ||
23 | #define HAVE_BN_MUL_COMBA4 | ||
24 | #define HAVE_BN_MUL_COMBA8 | ||
25 | |||
26 | #define HAVE_BN_SQR_COMBA4 | ||
27 | #define HAVE_BN_SQR_COMBA8 | ||
28 | |||
23 | #endif | 29 | #endif |
24 | #endif | 30 | #endif |
diff --git a/src/lib/libcrypto/bn/arch/sparc/bn_arch.h b/src/lib/libcrypto/bn/arch/sparc/bn_arch.h index 4d6571f9cb..17d22f3cec 100644 --- a/src/lib/libcrypto/bn/arch/sparc/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/sparc/bn_arch.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:34 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 17:31:52 jsing Exp $ */ |
2 | /* | 2 | /* |
3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
4 | * | 4 | * |
@@ -20,5 +20,11 @@ | |||
20 | 20 | ||
21 | #ifndef OPENSSL_NO_ASM | 21 | #ifndef OPENSSL_NO_ASM |
22 | 22 | ||
23 | #define HAVE_BN_MUL_COMBA4 | ||
24 | #define HAVE_BN_MUL_COMBA8 | ||
25 | |||
26 | #define HAVE_BN_SQR_COMBA4 | ||
27 | #define HAVE_BN_SQR_COMBA8 | ||
28 | |||
23 | #endif | 29 | #endif |
24 | #endif | 30 | #endif |
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index 84063486b3..df4ddaea17 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_asm.c,v 1.18 2023/01/20 17:26:03 jsing Exp $ */ | 1 | /* $OpenBSD: bn_asm.c,v 1.19 2023/01/20 17:31:52 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -479,265 +479,6 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
479 | 479 | ||
480 | #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) | 480 | #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) |
481 | 481 | ||
482 | #undef bn_mul_comba8 | ||
483 | #undef bn_mul_comba4 | ||
484 | #undef bn_sqr_comba8 | ||
485 | #undef bn_sqr_comba4 | ||
486 | |||
487 | void | ||
488 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
489 | { | ||
490 | BN_ULONG c1, c2, c3; | ||
491 | |||
492 | c1 = 0; | ||
493 | c2 = 0; | ||
494 | c3 = 0; | ||
495 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
496 | r[0] = c1; | ||
497 | c1 = 0; | ||
498 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
499 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
500 | r[1] = c2; | ||
501 | c2 = 0; | ||
502 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
503 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
504 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
505 | r[2] = c3; | ||
506 | c3 = 0; | ||
507 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
508 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
509 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
510 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
511 | r[3] = c1; | ||
512 | c1 = 0; | ||
513 | mul_add_c(a[4], b[0], c2, c3, c1); | ||
514 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
515 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
516 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
517 | mul_add_c(a[0], b[4], c2, c3, c1); | ||
518 | r[4] = c2; | ||
519 | c2 = 0; | ||
520 | mul_add_c(a[0], b[5], c3, c1, c2); | ||
521 | mul_add_c(a[1], b[4], c3, c1, c2); | ||
522 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
523 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
524 | mul_add_c(a[4], b[1], c3, c1, c2); | ||
525 | mul_add_c(a[5], b[0], c3, c1, c2); | ||
526 | r[5] = c3; | ||
527 | c3 = 0; | ||
528 | mul_add_c(a[6], b[0], c1, c2, c3); | ||
529 | mul_add_c(a[5], b[1], c1, c2, c3); | ||
530 | mul_add_c(a[4], b[2], c1, c2, c3); | ||
531 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
532 | mul_add_c(a[2], b[4], c1, c2, c3); | ||
533 | mul_add_c(a[1], b[5], c1, c2, c3); | ||
534 | mul_add_c(a[0], b[6], c1, c2, c3); | ||
535 | r[6] = c1; | ||
536 | c1 = 0; | ||
537 | mul_add_c(a[0], b[7], c2, c3, c1); | ||
538 | mul_add_c(a[1], b[6], c2, c3, c1); | ||
539 | mul_add_c(a[2], b[5], c2, c3, c1); | ||
540 | mul_add_c(a[3], b[4], c2, c3, c1); | ||
541 | mul_add_c(a[4], b[3], c2, c3, c1); | ||
542 | mul_add_c(a[5], b[2], c2, c3, c1); | ||
543 | mul_add_c(a[6], b[1], c2, c3, c1); | ||
544 | mul_add_c(a[7], b[0], c2, c3, c1); | ||
545 | r[7] = c2; | ||
546 | c2 = 0; | ||
547 | mul_add_c(a[7], b[1], c3, c1, c2); | ||
548 | mul_add_c(a[6], b[2], c3, c1, c2); | ||
549 | mul_add_c(a[5], b[3], c3, c1, c2); | ||
550 | mul_add_c(a[4], b[4], c3, c1, c2); | ||
551 | mul_add_c(a[3], b[5], c3, c1, c2); | ||
552 | mul_add_c(a[2], b[6], c3, c1, c2); | ||
553 | mul_add_c(a[1], b[7], c3, c1, c2); | ||
554 | r[8] = c3; | ||
555 | c3 = 0; | ||
556 | mul_add_c(a[2], b[7], c1, c2, c3); | ||
557 | mul_add_c(a[3], b[6], c1, c2, c3); | ||
558 | mul_add_c(a[4], b[5], c1, c2, c3); | ||
559 | mul_add_c(a[5], b[4], c1, c2, c3); | ||
560 | mul_add_c(a[6], b[3], c1, c2, c3); | ||
561 | mul_add_c(a[7], b[2], c1, c2, c3); | ||
562 | r[9] = c1; | ||
563 | c1 = 0; | ||
564 | mul_add_c(a[7], b[3], c2, c3, c1); | ||
565 | mul_add_c(a[6], b[4], c2, c3, c1); | ||
566 | mul_add_c(a[5], b[5], c2, c3, c1); | ||
567 | mul_add_c(a[4], b[6], c2, c3, c1); | ||
568 | mul_add_c(a[3], b[7], c2, c3, c1); | ||
569 | r[10] = c2; | ||
570 | c2 = 0; | ||
571 | mul_add_c(a[4], b[7], c3, c1, c2); | ||
572 | mul_add_c(a[5], b[6], c3, c1, c2); | ||
573 | mul_add_c(a[6], b[5], c3, c1, c2); | ||
574 | mul_add_c(a[7], b[4], c3, c1, c2); | ||
575 | r[11] = c3; | ||
576 | c3 = 0; | ||
577 | mul_add_c(a[7], b[5], c1, c2, c3); | ||
578 | mul_add_c(a[6], b[6], c1, c2, c3); | ||
579 | mul_add_c(a[5], b[7], c1, c2, c3); | ||
580 | r[12] = c1; | ||
581 | c1 = 0; | ||
582 | mul_add_c(a[6], b[7], c2, c3, c1); | ||
583 | mul_add_c(a[7], b[6], c2, c3, c1); | ||
584 | r[13] = c2; | ||
585 | c2 = 0; | ||
586 | mul_add_c(a[7], b[7], c3, c1, c2); | ||
587 | r[14] = c3; | ||
588 | r[15] = c1; | ||
589 | } | ||
590 | |||
591 | void | ||
592 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
593 | { | ||
594 | BN_ULONG c1, c2, c3; | ||
595 | |||
596 | c1 = 0; | ||
597 | c2 = 0; | ||
598 | c3 = 0; | ||
599 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
600 | r[0] = c1; | ||
601 | c1 = 0; | ||
602 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
603 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
604 | r[1] = c2; | ||
605 | c2 = 0; | ||
606 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
607 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
608 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
609 | r[2] = c3; | ||
610 | c3 = 0; | ||
611 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
612 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
613 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
614 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
615 | r[3] = c1; | ||
616 | c1 = 0; | ||
617 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
618 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
619 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
620 | r[4] = c2; | ||
621 | c2 = 0; | ||
622 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
623 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
624 | r[5] = c3; | ||
625 | c3 = 0; | ||
626 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
627 | r[6] = c1; | ||
628 | r[7] = c2; | ||
629 | } | ||
630 | |||
631 | void | ||
632 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | ||
633 | { | ||
634 | BN_ULONG c1, c2, c3; | ||
635 | |||
636 | c1 = 0; | ||
637 | c2 = 0; | ||
638 | c3 = 0; | ||
639 | sqr_add_c(a, 0, c1, c2, c3); | ||
640 | r[0] = c1; | ||
641 | c1 = 0; | ||
642 | sqr_add_c2(a, 1, 0, c2, c3, c1); | ||
643 | r[1] = c2; | ||
644 | c2 = 0; | ||
645 | sqr_add_c(a, 1, c3, c1, c2); | ||
646 | sqr_add_c2(a, 2, 0, c3, c1, c2); | ||
647 | r[2] = c3; | ||
648 | c3 = 0; | ||
649 | sqr_add_c2(a, 3, 0, c1, c2, c3); | ||
650 | sqr_add_c2(a, 2, 1, c1, c2, c3); | ||
651 | r[3] = c1; | ||
652 | c1 = 0; | ||
653 | sqr_add_c(a, 2, c2, c3, c1); | ||
654 | sqr_add_c2(a, 3, 1, c2, c3, c1); | ||
655 | sqr_add_c2(a, 4, 0, c2, c3, c1); | ||
656 | r[4] = c2; | ||
657 | c2 = 0; | ||
658 | sqr_add_c2(a, 5, 0, c3, c1, c2); | ||
659 | sqr_add_c2(a, 4, 1, c3, c1, c2); | ||
660 | sqr_add_c2(a, 3, 2, c3, c1, c2); | ||
661 | r[5] = c3; | ||
662 | c3 = 0; | ||
663 | sqr_add_c(a, 3, c1, c2, c3); | ||
664 | sqr_add_c2(a, 4, 2, c1, c2, c3); | ||
665 | sqr_add_c2(a, 5, 1, c1, c2, c3); | ||
666 | sqr_add_c2(a, 6, 0, c1, c2, c3); | ||
667 | r[6] = c1; | ||
668 | c1 = 0; | ||
669 | sqr_add_c2(a, 7, 0, c2, c3, c1); | ||
670 | sqr_add_c2(a, 6, 1, c2, c3, c1); | ||
671 | sqr_add_c2(a, 5, 2, c2, c3, c1); | ||
672 | sqr_add_c2(a, 4, 3, c2, c3, c1); | ||
673 | r[7] = c2; | ||
674 | c2 = 0; | ||
675 | sqr_add_c(a, 4, c3, c1, c2); | ||
676 | sqr_add_c2(a, 5, 3, c3, c1, c2); | ||
677 | sqr_add_c2(a, 6, 2, c3, c1, c2); | ||
678 | sqr_add_c2(a, 7, 1, c3, c1, c2); | ||
679 | r[8] = c3; | ||
680 | c3 = 0; | ||
681 | sqr_add_c2(a, 7, 2, c1, c2, c3); | ||
682 | sqr_add_c2(a, 6, 3, c1, c2, c3); | ||
683 | sqr_add_c2(a, 5, 4, c1, c2, c3); | ||
684 | r[9] = c1; | ||
685 | c1 = 0; | ||
686 | sqr_add_c(a, 5, c2, c3, c1); | ||
687 | sqr_add_c2(a, 6, 4, c2, c3, c1); | ||
688 | sqr_add_c2(a, 7, 3, c2, c3, c1); | ||
689 | r[10] = c2; | ||
690 | c2 = 0; | ||
691 | sqr_add_c2(a, 7, 4, c3, c1, c2); | ||
692 | sqr_add_c2(a, 6, 5, c3, c1, c2); | ||
693 | r[11] = c3; | ||
694 | c3 = 0; | ||
695 | sqr_add_c(a, 6, c1, c2, c3); | ||
696 | sqr_add_c2(a, 7, 5, c1, c2, c3); | ||
697 | r[12] = c1; | ||
698 | c1 = 0; | ||
699 | sqr_add_c2(a, 7, 6, c2, c3, c1); | ||
700 | r[13] = c2; | ||
701 | c2 = 0; | ||
702 | sqr_add_c(a, 7, c3, c1, c2); | ||
703 | r[14] = c3; | ||
704 | r[15] = c1; | ||
705 | } | ||
706 | |||
707 | void | ||
708 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | ||
709 | { | ||
710 | BN_ULONG c1, c2, c3; | ||
711 | |||
712 | c1 = 0; | ||
713 | c2 = 0; | ||
714 | c3 = 0; | ||
715 | sqr_add_c(a, 0, c1, c2, c3); | ||
716 | r[0] = c1; | ||
717 | c1 = 0; | ||
718 | sqr_add_c2(a, 1, 0, c2, c3, c1); | ||
719 | r[1] = c2; | ||
720 | c2 = 0; | ||
721 | sqr_add_c(a, 1, c3, c1, c2); | ||
722 | sqr_add_c2(a, 2, 0, c3, c1, c2); | ||
723 | r[2] = c3; | ||
724 | c3 = 0; | ||
725 | sqr_add_c2(a, 3, 0, c1, c2, c3); | ||
726 | sqr_add_c2(a, 2, 1, c1, c2, c3); | ||
727 | r[3] = c1; | ||
728 | c1 = 0; | ||
729 | sqr_add_c(a, 2, c2, c3, c1); | ||
730 | sqr_add_c2(a, 3, 1, c2, c3, c1); | ||
731 | r[4] = c2; | ||
732 | c2 = 0; | ||
733 | sqr_add_c2(a, 3, 2, c3, c1, c2); | ||
734 | r[5] = c3; | ||
735 | c3 = 0; | ||
736 | sqr_add_c(a, 3, c1, c2, c3); | ||
737 | r[6] = c1; | ||
738 | r[7] = c2; | ||
739 | } | ||
740 | |||
741 | #ifdef OPENSSL_NO_ASM | 482 | #ifdef OPENSSL_NO_ASM |
742 | #ifdef OPENSSL_BN_ASM_MONT | 483 | #ifdef OPENSSL_BN_ASM_MONT |
743 | /* | 484 | /* |
@@ -853,45 +594,6 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U | |||
853 | 594 | ||
854 | #else /* !BN_MUL_COMBA */ | 595 | #else /* !BN_MUL_COMBA */ |
855 | 596 | ||
856 | /* hmm... is it faster just to do a multiply? */ | ||
857 | #undef bn_sqr_comba4 | ||
858 | void | ||
859 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | ||
860 | { | ||
861 | BN_ULONG t[8]; | ||
862 | bn_sqr_normal(r, a, 4, t); | ||
863 | } | ||
864 | |||
865 | #undef bn_sqr_comba8 | ||
866 | void | ||
867 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | ||
868 | { | ||
869 | BN_ULONG t[16]; | ||
870 | bn_sqr_normal(r, a, 8, t); | ||
871 | } | ||
872 | |||
873 | void | ||
874 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
875 | { | ||
876 | r[4] = bn_mul_words(&(r[0]), a, 4, b[0]); | ||
877 | r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]); | ||
878 | r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]); | ||
879 | r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]); | ||
880 | } | ||
881 | |||
882 | void | ||
883 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
884 | { | ||
885 | r[8] = bn_mul_words(&(r[0]), a, 8, b[0]); | ||
886 | r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]); | ||
887 | r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]); | ||
888 | r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]); | ||
889 | r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]); | ||
890 | r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]); | ||
891 | r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]); | ||
892 | r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]); | ||
893 | } | ||
894 | |||
895 | #ifdef OPENSSL_NO_ASM | 597 | #ifdef OPENSSL_NO_ASM |
896 | #ifdef OPENSSL_BN_ASM_MONT | 598 | #ifdef OPENSSL_BN_ASM_MONT |
897 | int | 599 | int |
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index b7a7f8bcef..3a69ef35da 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_mul.c,v 1.27 2023/01/20 12:16:46 jsing Exp $ */ | 1 | /* $OpenBSD: bn_mul.c,v 1.28 2023/01/20 17:31:52 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -62,8 +62,157 @@ | |||
62 | 62 | ||
63 | #include <openssl/opensslconf.h> | 63 | #include <openssl/opensslconf.h> |
64 | 64 | ||
65 | #include "bn_arch.h" | ||
65 | #include "bn_local.h" | 66 | #include "bn_local.h" |
66 | 67 | ||
68 | #ifndef HAVE_BN_MUL_COMBA4 | ||
69 | void | ||
70 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
71 | { | ||
72 | BN_ULONG c1, c2, c3; | ||
73 | |||
74 | c1 = 0; | ||
75 | c2 = 0; | ||
76 | c3 = 0; | ||
77 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
78 | r[0] = c1; | ||
79 | c1 = 0; | ||
80 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
81 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
82 | r[1] = c2; | ||
83 | c2 = 0; | ||
84 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
85 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
86 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
87 | r[2] = c3; | ||
88 | c3 = 0; | ||
89 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
90 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
91 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
92 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
93 | r[3] = c1; | ||
94 | c1 = 0; | ||
95 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
96 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
97 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
98 | r[4] = c2; | ||
99 | c2 = 0; | ||
100 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
101 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
102 | r[5] = c3; | ||
103 | c3 = 0; | ||
104 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
105 | r[6] = c1; | ||
106 | r[7] = c2; | ||
107 | } | ||
108 | #endif | ||
109 | |||
110 | #ifndef HAVE_BN_MUL_COMBA8 | ||
111 | void | ||
112 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
113 | { | ||
114 | BN_ULONG c1, c2, c3; | ||
115 | |||
116 | c1 = 0; | ||
117 | c2 = 0; | ||
118 | c3 = 0; | ||
119 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
120 | r[0] = c1; | ||
121 | c1 = 0; | ||
122 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
123 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
124 | r[1] = c2; | ||
125 | c2 = 0; | ||
126 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
127 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
128 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
129 | r[2] = c3; | ||
130 | c3 = 0; | ||
131 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
132 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
133 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
134 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
135 | r[3] = c1; | ||
136 | c1 = 0; | ||
137 | mul_add_c(a[4], b[0], c2, c3, c1); | ||
138 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
139 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
140 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
141 | mul_add_c(a[0], b[4], c2, c3, c1); | ||
142 | r[4] = c2; | ||
143 | c2 = 0; | ||
144 | mul_add_c(a[0], b[5], c3, c1, c2); | ||
145 | mul_add_c(a[1], b[4], c3, c1, c2); | ||
146 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
147 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
148 | mul_add_c(a[4], b[1], c3, c1, c2); | ||
149 | mul_add_c(a[5], b[0], c3, c1, c2); | ||
150 | r[5] = c3; | ||
151 | c3 = 0; | ||
152 | mul_add_c(a[6], b[0], c1, c2, c3); | ||
153 | mul_add_c(a[5], b[1], c1, c2, c3); | ||
154 | mul_add_c(a[4], b[2], c1, c2, c3); | ||
155 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
156 | mul_add_c(a[2], b[4], c1, c2, c3); | ||
157 | mul_add_c(a[1], b[5], c1, c2, c3); | ||
158 | mul_add_c(a[0], b[6], c1, c2, c3); | ||
159 | r[6] = c1; | ||
160 | c1 = 0; | ||
161 | mul_add_c(a[0], b[7], c2, c3, c1); | ||
162 | mul_add_c(a[1], b[6], c2, c3, c1); | ||
163 | mul_add_c(a[2], b[5], c2, c3, c1); | ||
164 | mul_add_c(a[3], b[4], c2, c3, c1); | ||
165 | mul_add_c(a[4], b[3], c2, c3, c1); | ||
166 | mul_add_c(a[5], b[2], c2, c3, c1); | ||
167 | mul_add_c(a[6], b[1], c2, c3, c1); | ||
168 | mul_add_c(a[7], b[0], c2, c3, c1); | ||
169 | r[7] = c2; | ||
170 | c2 = 0; | ||
171 | mul_add_c(a[7], b[1], c3, c1, c2); | ||
172 | mul_add_c(a[6], b[2], c3, c1, c2); | ||
173 | mul_add_c(a[5], b[3], c3, c1, c2); | ||
174 | mul_add_c(a[4], b[4], c3, c1, c2); | ||
175 | mul_add_c(a[3], b[5], c3, c1, c2); | ||
176 | mul_add_c(a[2], b[6], c3, c1, c2); | ||
177 | mul_add_c(a[1], b[7], c3, c1, c2); | ||
178 | r[8] = c3; | ||
179 | c3 = 0; | ||
180 | mul_add_c(a[2], b[7], c1, c2, c3); | ||
181 | mul_add_c(a[3], b[6], c1, c2, c3); | ||
182 | mul_add_c(a[4], b[5], c1, c2, c3); | ||
183 | mul_add_c(a[5], b[4], c1, c2, c3); | ||
184 | mul_add_c(a[6], b[3], c1, c2, c3); | ||
185 | mul_add_c(a[7], b[2], c1, c2, c3); | ||
186 | r[9] = c1; | ||
187 | c1 = 0; | ||
188 | mul_add_c(a[7], b[3], c2, c3, c1); | ||
189 | mul_add_c(a[6], b[4], c2, c3, c1); | ||
190 | mul_add_c(a[5], b[5], c2, c3, c1); | ||
191 | mul_add_c(a[4], b[6], c2, c3, c1); | ||
192 | mul_add_c(a[3], b[7], c2, c3, c1); | ||
193 | r[10] = c2; | ||
194 | c2 = 0; | ||
195 | mul_add_c(a[4], b[7], c3, c1, c2); | ||
196 | mul_add_c(a[5], b[6], c3, c1, c2); | ||
197 | mul_add_c(a[6], b[5], c3, c1, c2); | ||
198 | mul_add_c(a[7], b[4], c3, c1, c2); | ||
199 | r[11] = c3; | ||
200 | c3 = 0; | ||
201 | mul_add_c(a[7], b[5], c1, c2, c3); | ||
202 | mul_add_c(a[6], b[6], c1, c2, c3); | ||
203 | mul_add_c(a[5], b[7], c1, c2, c3); | ||
204 | r[12] = c1; | ||
205 | c1 = 0; | ||
206 | mul_add_c(a[6], b[7], c2, c3, c1); | ||
207 | mul_add_c(a[7], b[6], c2, c3, c1); | ||
208 | r[13] = c2; | ||
209 | c2 = 0; | ||
210 | mul_add_c(a[7], b[7], c3, c1, c2); | ||
211 | r[14] = c3; | ||
212 | r[15] = c1; | ||
213 | } | ||
214 | #endif | ||
215 | |||
67 | #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) | 216 | #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) |
68 | /* | 217 | /* |
69 | * Here follows a specialised variant of bn_sub_words(), which has the property | 218 | * Here follows a specialised variant of bn_sub_words(), which has the property |
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c index 56ea378527..02b87556d4 100644 --- a/src/lib/libcrypto/bn/bn_sqr.c +++ b/src/lib/libcrypto/bn/bn_sqr.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_sqr.c,v 1.18 2023/01/16 17:56:25 jsing Exp $ */ | 1 | /* $OpenBSD: bn_sqr.c,v 1.19 2023/01/20 17:31:52 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -59,8 +59,123 @@ | |||
59 | #include <stdio.h> | 59 | #include <stdio.h> |
60 | #include <string.h> | 60 | #include <string.h> |
61 | 61 | ||
62 | #include "bn_arch.h" | ||
62 | #include "bn_local.h" | 63 | #include "bn_local.h" |
63 | 64 | ||
65 | #ifndef HAVE_BN_SQR_COMBA4 | ||
66 | void | ||
67 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | ||
68 | { | ||
69 | BN_ULONG c1, c2, c3; | ||
70 | |||
71 | c1 = 0; | ||
72 | c2 = 0; | ||
73 | c3 = 0; | ||
74 | sqr_add_c(a, 0, c1, c2, c3); | ||
75 | r[0] = c1; | ||
76 | c1 = 0; | ||
77 | sqr_add_c2(a, 1, 0, c2, c3, c1); | ||
78 | r[1] = c2; | ||
79 | c2 = 0; | ||
80 | sqr_add_c(a, 1, c3, c1, c2); | ||
81 | sqr_add_c2(a, 2, 0, c3, c1, c2); | ||
82 | r[2] = c3; | ||
83 | c3 = 0; | ||
84 | sqr_add_c2(a, 3, 0, c1, c2, c3); | ||
85 | sqr_add_c2(a, 2, 1, c1, c2, c3); | ||
86 | r[3] = c1; | ||
87 | c1 = 0; | ||
88 | sqr_add_c(a, 2, c2, c3, c1); | ||
89 | sqr_add_c2(a, 3, 1, c2, c3, c1); | ||
90 | r[4] = c2; | ||
91 | c2 = 0; | ||
92 | sqr_add_c2(a, 3, 2, c3, c1, c2); | ||
93 | r[5] = c3; | ||
94 | c3 = 0; | ||
95 | sqr_add_c(a, 3, c1, c2, c3); | ||
96 | r[6] = c1; | ||
97 | r[7] = c2; | ||
98 | } | ||
99 | #endif | ||
100 | |||
101 | #ifndef HAVE_BN_SQR_COMBA8 | ||
102 | void | ||
103 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | ||
104 | { | ||
105 | BN_ULONG c1, c2, c3; | ||
106 | |||
107 | c1 = 0; | ||
108 | c2 = 0; | ||
109 | c3 = 0; | ||
110 | sqr_add_c(a, 0, c1, c2, c3); | ||
111 | r[0] = c1; | ||
112 | c1 = 0; | ||
113 | sqr_add_c2(a, 1, 0, c2, c3, c1); | ||
114 | r[1] = c2; | ||
115 | c2 = 0; | ||
116 | sqr_add_c(a, 1, c3, c1, c2); | ||
117 | sqr_add_c2(a, 2, 0, c3, c1, c2); | ||
118 | r[2] = c3; | ||
119 | c3 = 0; | ||
120 | sqr_add_c2(a, 3, 0, c1, c2, c3); | ||
121 | sqr_add_c2(a, 2, 1, c1, c2, c3); | ||
122 | r[3] = c1; | ||
123 | c1 = 0; | ||
124 | sqr_add_c(a, 2, c2, c3, c1); | ||
125 | sqr_add_c2(a, 3, 1, c2, c3, c1); | ||
126 | sqr_add_c2(a, 4, 0, c2, c3, c1); | ||
127 | r[4] = c2; | ||
128 | c2 = 0; | ||
129 | sqr_add_c2(a, 5, 0, c3, c1, c2); | ||
130 | sqr_add_c2(a, 4, 1, c3, c1, c2); | ||
131 | sqr_add_c2(a, 3, 2, c3, c1, c2); | ||
132 | r[5] = c3; | ||
133 | c3 = 0; | ||
134 | sqr_add_c(a, 3, c1, c2, c3); | ||
135 | sqr_add_c2(a, 4, 2, c1, c2, c3); | ||
136 | sqr_add_c2(a, 5, 1, c1, c2, c3); | ||
137 | sqr_add_c2(a, 6, 0, c1, c2, c3); | ||
138 | r[6] = c1; | ||
139 | c1 = 0; | ||
140 | sqr_add_c2(a, 7, 0, c2, c3, c1); | ||
141 | sqr_add_c2(a, 6, 1, c2, c3, c1); | ||
142 | sqr_add_c2(a, 5, 2, c2, c3, c1); | ||
143 | sqr_add_c2(a, 4, 3, c2, c3, c1); | ||
144 | r[7] = c2; | ||
145 | c2 = 0; | ||
146 | sqr_add_c(a, 4, c3, c1, c2); | ||
147 | sqr_add_c2(a, 5, 3, c3, c1, c2); | ||
148 | sqr_add_c2(a, 6, 2, c3, c1, c2); | ||
149 | sqr_add_c2(a, 7, 1, c3, c1, c2); | ||
150 | r[8] = c3; | ||
151 | c3 = 0; | ||
152 | sqr_add_c2(a, 7, 2, c1, c2, c3); | ||
153 | sqr_add_c2(a, 6, 3, c1, c2, c3); | ||
154 | sqr_add_c2(a, 5, 4, c1, c2, c3); | ||
155 | r[9] = c1; | ||
156 | c1 = 0; | ||
157 | sqr_add_c(a, 5, c2, c3, c1); | ||
158 | sqr_add_c2(a, 6, 4, c2, c3, c1); | ||
159 | sqr_add_c2(a, 7, 3, c2, c3, c1); | ||
160 | r[10] = c2; | ||
161 | c2 = 0; | ||
162 | sqr_add_c2(a, 7, 4, c3, c1, c2); | ||
163 | sqr_add_c2(a, 6, 5, c3, c1, c2); | ||
164 | r[11] = c3; | ||
165 | c3 = 0; | ||
166 | sqr_add_c(a, 6, c1, c2, c3); | ||
167 | sqr_add_c2(a, 7, 5, c1, c2, c3); | ||
168 | r[12] = c1; | ||
169 | c1 = 0; | ||
170 | sqr_add_c2(a, 7, 6, c2, c3, c1); | ||
171 | r[13] = c2; | ||
172 | c2 = 0; | ||
173 | sqr_add_c(a, 7, c3, c1, c2); | ||
174 | r[14] = c3; | ||
175 | r[15] = c1; | ||
176 | } | ||
177 | #endif | ||
178 | |||
64 | /* tmp must have 2*n words */ | 179 | /* tmp must have 2*n words */ |
65 | void | 180 | void |
66 | bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp) | 181 | bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp) |