diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib/libcrypto/bn/arch/amd64/bn_arch.h | 8 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/i386/bn_arch.h | 8 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/mips64/bn_arch.h | 8 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/powerpc/bn_arch.h | 8 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/arch/sparc/bn_arch.h | 8 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_asm.c | 300 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_mul.c | 151 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_sqr.c | 117 |
8 files changed, 302 insertions, 306 deletions
diff --git a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h index 136adf0e97..17d22f3cec 100644 --- a/src/lib/libcrypto/bn/arch/amd64/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/amd64/bn_arch.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:33 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 17:31:52 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -20,5 +20,11 @@ | |||
| 20 | 20 | ||
| 21 | #ifndef OPENSSL_NO_ASM | 21 | #ifndef OPENSSL_NO_ASM |
| 22 | 22 | ||
| 23 | #define HAVE_BN_MUL_COMBA4 | ||
| 24 | #define HAVE_BN_MUL_COMBA8 | ||
| 25 | |||
| 26 | #define HAVE_BN_SQR_COMBA4 | ||
| 27 | #define HAVE_BN_SQR_COMBA8 | ||
| 28 | |||
| 23 | #endif | 29 | #endif |
| 24 | #endif | 30 | #endif |
diff --git a/src/lib/libcrypto/bn/arch/i386/bn_arch.h b/src/lib/libcrypto/bn/arch/i386/bn_arch.h index 136adf0e97..17d22f3cec 100644 --- a/src/lib/libcrypto/bn/arch/i386/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/i386/bn_arch.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:33 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 17:31:52 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -20,5 +20,11 @@ | |||
| 20 | 20 | ||
| 21 | #ifndef OPENSSL_NO_ASM | 21 | #ifndef OPENSSL_NO_ASM |
| 22 | 22 | ||
| 23 | #define HAVE_BN_MUL_COMBA4 | ||
| 24 | #define HAVE_BN_MUL_COMBA8 | ||
| 25 | |||
| 26 | #define HAVE_BN_SQR_COMBA4 | ||
| 27 | #define HAVE_BN_SQR_COMBA8 | ||
| 28 | |||
| 23 | #endif | 29 | #endif |
| 24 | #endif | 30 | #endif |
diff --git a/src/lib/libcrypto/bn/arch/mips64/bn_arch.h b/src/lib/libcrypto/bn/arch/mips64/bn_arch.h index 6c6212c4a6..8e8fd1110f 100644 --- a/src/lib/libcrypto/bn/arch/mips64/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/mips64/bn_arch.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 10:07:52 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.3 2023/01/20 17:31:52 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -22,5 +22,11 @@ | |||
| 22 | 22 | ||
| 23 | #define HAVE_BN_DIV_3_WORDS | 23 | #define HAVE_BN_DIV_3_WORDS |
| 24 | 24 | ||
| 25 | #define HAVE_BN_MUL_COMBA4 | ||
| 26 | #define HAVE_BN_MUL_COMBA8 | ||
| 27 | |||
| 28 | #define HAVE_BN_SQR_COMBA4 | ||
| 29 | #define HAVE_BN_SQR_COMBA8 | ||
| 30 | |||
| 25 | #endif | 31 | #endif |
| 26 | #endif | 32 | #endif |
diff --git a/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h b/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h index 4d6571f9cb..17d22f3cec 100644 --- a/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/powerpc/bn_arch.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:34 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 17:31:52 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -20,5 +20,11 @@ | |||
| 20 | 20 | ||
| 21 | #ifndef OPENSSL_NO_ASM | 21 | #ifndef OPENSSL_NO_ASM |
| 22 | 22 | ||
| 23 | #define HAVE_BN_MUL_COMBA4 | ||
| 24 | #define HAVE_BN_MUL_COMBA8 | ||
| 25 | |||
| 26 | #define HAVE_BN_SQR_COMBA4 | ||
| 27 | #define HAVE_BN_SQR_COMBA8 | ||
| 28 | |||
| 23 | #endif | 29 | #endif |
| 24 | #endif | 30 | #endif |
diff --git a/src/lib/libcrypto/bn/arch/sparc/bn_arch.h b/src/lib/libcrypto/bn/arch/sparc/bn_arch.h index 4d6571f9cb..17d22f3cec 100644 --- a/src/lib/libcrypto/bn/arch/sparc/bn_arch.h +++ b/src/lib/libcrypto/bn/arch/sparc/bn_arch.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_arch.h,v 1.1 2023/01/20 10:04:34 jsing Exp $ */ | 1 | /* $OpenBSD: bn_arch.h,v 1.2 2023/01/20 17:31:52 jsing Exp $ */ |
| 2 | /* | 2 | /* |
| 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> |
| 4 | * | 4 | * |
| @@ -20,5 +20,11 @@ | |||
| 20 | 20 | ||
| 21 | #ifndef OPENSSL_NO_ASM | 21 | #ifndef OPENSSL_NO_ASM |
| 22 | 22 | ||
| 23 | #define HAVE_BN_MUL_COMBA4 | ||
| 24 | #define HAVE_BN_MUL_COMBA8 | ||
| 25 | |||
| 26 | #define HAVE_BN_SQR_COMBA4 | ||
| 27 | #define HAVE_BN_SQR_COMBA8 | ||
| 28 | |||
| 23 | #endif | 29 | #endif |
| 24 | #endif | 30 | #endif |
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index 84063486b3..df4ddaea17 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_asm.c,v 1.18 2023/01/20 17:26:03 jsing Exp $ */ | 1 | /* $OpenBSD: bn_asm.c,v 1.19 2023/01/20 17:31:52 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -479,265 +479,6 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) | |||
| 479 | 479 | ||
| 480 | #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) | 480 | #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) |
| 481 | 481 | ||
| 482 | #undef bn_mul_comba8 | ||
| 483 | #undef bn_mul_comba4 | ||
| 484 | #undef bn_sqr_comba8 | ||
| 485 | #undef bn_sqr_comba4 | ||
| 486 | |||
| 487 | void | ||
| 488 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
| 489 | { | ||
| 490 | BN_ULONG c1, c2, c3; | ||
| 491 | |||
| 492 | c1 = 0; | ||
| 493 | c2 = 0; | ||
| 494 | c3 = 0; | ||
| 495 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
| 496 | r[0] = c1; | ||
| 497 | c1 = 0; | ||
| 498 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
| 499 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
| 500 | r[1] = c2; | ||
| 501 | c2 = 0; | ||
| 502 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
| 503 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
| 504 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
| 505 | r[2] = c3; | ||
| 506 | c3 = 0; | ||
| 507 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
| 508 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
| 509 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
| 510 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
| 511 | r[3] = c1; | ||
| 512 | c1 = 0; | ||
| 513 | mul_add_c(a[4], b[0], c2, c3, c1); | ||
| 514 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
| 515 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
| 516 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
| 517 | mul_add_c(a[0], b[4], c2, c3, c1); | ||
| 518 | r[4] = c2; | ||
| 519 | c2 = 0; | ||
| 520 | mul_add_c(a[0], b[5], c3, c1, c2); | ||
| 521 | mul_add_c(a[1], b[4], c3, c1, c2); | ||
| 522 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
| 523 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
| 524 | mul_add_c(a[4], b[1], c3, c1, c2); | ||
| 525 | mul_add_c(a[5], b[0], c3, c1, c2); | ||
| 526 | r[5] = c3; | ||
| 527 | c3 = 0; | ||
| 528 | mul_add_c(a[6], b[0], c1, c2, c3); | ||
| 529 | mul_add_c(a[5], b[1], c1, c2, c3); | ||
| 530 | mul_add_c(a[4], b[2], c1, c2, c3); | ||
| 531 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
| 532 | mul_add_c(a[2], b[4], c1, c2, c3); | ||
| 533 | mul_add_c(a[1], b[5], c1, c2, c3); | ||
| 534 | mul_add_c(a[0], b[6], c1, c2, c3); | ||
| 535 | r[6] = c1; | ||
| 536 | c1 = 0; | ||
| 537 | mul_add_c(a[0], b[7], c2, c3, c1); | ||
| 538 | mul_add_c(a[1], b[6], c2, c3, c1); | ||
| 539 | mul_add_c(a[2], b[5], c2, c3, c1); | ||
| 540 | mul_add_c(a[3], b[4], c2, c3, c1); | ||
| 541 | mul_add_c(a[4], b[3], c2, c3, c1); | ||
| 542 | mul_add_c(a[5], b[2], c2, c3, c1); | ||
| 543 | mul_add_c(a[6], b[1], c2, c3, c1); | ||
| 544 | mul_add_c(a[7], b[0], c2, c3, c1); | ||
| 545 | r[7] = c2; | ||
| 546 | c2 = 0; | ||
| 547 | mul_add_c(a[7], b[1], c3, c1, c2); | ||
| 548 | mul_add_c(a[6], b[2], c3, c1, c2); | ||
| 549 | mul_add_c(a[5], b[3], c3, c1, c2); | ||
| 550 | mul_add_c(a[4], b[4], c3, c1, c2); | ||
| 551 | mul_add_c(a[3], b[5], c3, c1, c2); | ||
| 552 | mul_add_c(a[2], b[6], c3, c1, c2); | ||
| 553 | mul_add_c(a[1], b[7], c3, c1, c2); | ||
| 554 | r[8] = c3; | ||
| 555 | c3 = 0; | ||
| 556 | mul_add_c(a[2], b[7], c1, c2, c3); | ||
| 557 | mul_add_c(a[3], b[6], c1, c2, c3); | ||
| 558 | mul_add_c(a[4], b[5], c1, c2, c3); | ||
| 559 | mul_add_c(a[5], b[4], c1, c2, c3); | ||
| 560 | mul_add_c(a[6], b[3], c1, c2, c3); | ||
| 561 | mul_add_c(a[7], b[2], c1, c2, c3); | ||
| 562 | r[9] = c1; | ||
| 563 | c1 = 0; | ||
| 564 | mul_add_c(a[7], b[3], c2, c3, c1); | ||
| 565 | mul_add_c(a[6], b[4], c2, c3, c1); | ||
| 566 | mul_add_c(a[5], b[5], c2, c3, c1); | ||
| 567 | mul_add_c(a[4], b[6], c2, c3, c1); | ||
| 568 | mul_add_c(a[3], b[7], c2, c3, c1); | ||
| 569 | r[10] = c2; | ||
| 570 | c2 = 0; | ||
| 571 | mul_add_c(a[4], b[7], c3, c1, c2); | ||
| 572 | mul_add_c(a[5], b[6], c3, c1, c2); | ||
| 573 | mul_add_c(a[6], b[5], c3, c1, c2); | ||
| 574 | mul_add_c(a[7], b[4], c3, c1, c2); | ||
| 575 | r[11] = c3; | ||
| 576 | c3 = 0; | ||
| 577 | mul_add_c(a[7], b[5], c1, c2, c3); | ||
| 578 | mul_add_c(a[6], b[6], c1, c2, c3); | ||
| 579 | mul_add_c(a[5], b[7], c1, c2, c3); | ||
| 580 | r[12] = c1; | ||
| 581 | c1 = 0; | ||
| 582 | mul_add_c(a[6], b[7], c2, c3, c1); | ||
| 583 | mul_add_c(a[7], b[6], c2, c3, c1); | ||
| 584 | r[13] = c2; | ||
| 585 | c2 = 0; | ||
| 586 | mul_add_c(a[7], b[7], c3, c1, c2); | ||
| 587 | r[14] = c3; | ||
| 588 | r[15] = c1; | ||
| 589 | } | ||
| 590 | |||
| 591 | void | ||
| 592 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
| 593 | { | ||
| 594 | BN_ULONG c1, c2, c3; | ||
| 595 | |||
| 596 | c1 = 0; | ||
| 597 | c2 = 0; | ||
| 598 | c3 = 0; | ||
| 599 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
| 600 | r[0] = c1; | ||
| 601 | c1 = 0; | ||
| 602 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
| 603 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
| 604 | r[1] = c2; | ||
| 605 | c2 = 0; | ||
| 606 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
| 607 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
| 608 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
| 609 | r[2] = c3; | ||
| 610 | c3 = 0; | ||
| 611 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
| 612 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
| 613 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
| 614 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
| 615 | r[3] = c1; | ||
| 616 | c1 = 0; | ||
| 617 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
| 618 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
| 619 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
| 620 | r[4] = c2; | ||
| 621 | c2 = 0; | ||
| 622 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
| 623 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
| 624 | r[5] = c3; | ||
| 625 | c3 = 0; | ||
| 626 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
| 627 | r[6] = c1; | ||
| 628 | r[7] = c2; | ||
| 629 | } | ||
| 630 | |||
| 631 | void | ||
| 632 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | ||
| 633 | { | ||
| 634 | BN_ULONG c1, c2, c3; | ||
| 635 | |||
| 636 | c1 = 0; | ||
| 637 | c2 = 0; | ||
| 638 | c3 = 0; | ||
| 639 | sqr_add_c(a, 0, c1, c2, c3); | ||
| 640 | r[0] = c1; | ||
| 641 | c1 = 0; | ||
| 642 | sqr_add_c2(a, 1, 0, c2, c3, c1); | ||
| 643 | r[1] = c2; | ||
| 644 | c2 = 0; | ||
| 645 | sqr_add_c(a, 1, c3, c1, c2); | ||
| 646 | sqr_add_c2(a, 2, 0, c3, c1, c2); | ||
| 647 | r[2] = c3; | ||
| 648 | c3 = 0; | ||
| 649 | sqr_add_c2(a, 3, 0, c1, c2, c3); | ||
| 650 | sqr_add_c2(a, 2, 1, c1, c2, c3); | ||
| 651 | r[3] = c1; | ||
| 652 | c1 = 0; | ||
| 653 | sqr_add_c(a, 2, c2, c3, c1); | ||
| 654 | sqr_add_c2(a, 3, 1, c2, c3, c1); | ||
| 655 | sqr_add_c2(a, 4, 0, c2, c3, c1); | ||
| 656 | r[4] = c2; | ||
| 657 | c2 = 0; | ||
| 658 | sqr_add_c2(a, 5, 0, c3, c1, c2); | ||
| 659 | sqr_add_c2(a, 4, 1, c3, c1, c2); | ||
| 660 | sqr_add_c2(a, 3, 2, c3, c1, c2); | ||
| 661 | r[5] = c3; | ||
| 662 | c3 = 0; | ||
| 663 | sqr_add_c(a, 3, c1, c2, c3); | ||
| 664 | sqr_add_c2(a, 4, 2, c1, c2, c3); | ||
| 665 | sqr_add_c2(a, 5, 1, c1, c2, c3); | ||
| 666 | sqr_add_c2(a, 6, 0, c1, c2, c3); | ||
| 667 | r[6] = c1; | ||
| 668 | c1 = 0; | ||
| 669 | sqr_add_c2(a, 7, 0, c2, c3, c1); | ||
| 670 | sqr_add_c2(a, 6, 1, c2, c3, c1); | ||
| 671 | sqr_add_c2(a, 5, 2, c2, c3, c1); | ||
| 672 | sqr_add_c2(a, 4, 3, c2, c3, c1); | ||
| 673 | r[7] = c2; | ||
| 674 | c2 = 0; | ||
| 675 | sqr_add_c(a, 4, c3, c1, c2); | ||
| 676 | sqr_add_c2(a, 5, 3, c3, c1, c2); | ||
| 677 | sqr_add_c2(a, 6, 2, c3, c1, c2); | ||
| 678 | sqr_add_c2(a, 7, 1, c3, c1, c2); | ||
| 679 | r[8] = c3; | ||
| 680 | c3 = 0; | ||
| 681 | sqr_add_c2(a, 7, 2, c1, c2, c3); | ||
| 682 | sqr_add_c2(a, 6, 3, c1, c2, c3); | ||
| 683 | sqr_add_c2(a, 5, 4, c1, c2, c3); | ||
| 684 | r[9] = c1; | ||
| 685 | c1 = 0; | ||
| 686 | sqr_add_c(a, 5, c2, c3, c1); | ||
| 687 | sqr_add_c2(a, 6, 4, c2, c3, c1); | ||
| 688 | sqr_add_c2(a, 7, 3, c2, c3, c1); | ||
| 689 | r[10] = c2; | ||
| 690 | c2 = 0; | ||
| 691 | sqr_add_c2(a, 7, 4, c3, c1, c2); | ||
| 692 | sqr_add_c2(a, 6, 5, c3, c1, c2); | ||
| 693 | r[11] = c3; | ||
| 694 | c3 = 0; | ||
| 695 | sqr_add_c(a, 6, c1, c2, c3); | ||
| 696 | sqr_add_c2(a, 7, 5, c1, c2, c3); | ||
| 697 | r[12] = c1; | ||
| 698 | c1 = 0; | ||
| 699 | sqr_add_c2(a, 7, 6, c2, c3, c1); | ||
| 700 | r[13] = c2; | ||
| 701 | c2 = 0; | ||
| 702 | sqr_add_c(a, 7, c3, c1, c2); | ||
| 703 | r[14] = c3; | ||
| 704 | r[15] = c1; | ||
| 705 | } | ||
| 706 | |||
| 707 | void | ||
| 708 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | ||
| 709 | { | ||
| 710 | BN_ULONG c1, c2, c3; | ||
| 711 | |||
| 712 | c1 = 0; | ||
| 713 | c2 = 0; | ||
| 714 | c3 = 0; | ||
| 715 | sqr_add_c(a, 0, c1, c2, c3); | ||
| 716 | r[0] = c1; | ||
| 717 | c1 = 0; | ||
| 718 | sqr_add_c2(a, 1, 0, c2, c3, c1); | ||
| 719 | r[1] = c2; | ||
| 720 | c2 = 0; | ||
| 721 | sqr_add_c(a, 1, c3, c1, c2); | ||
| 722 | sqr_add_c2(a, 2, 0, c3, c1, c2); | ||
| 723 | r[2] = c3; | ||
| 724 | c3 = 0; | ||
| 725 | sqr_add_c2(a, 3, 0, c1, c2, c3); | ||
| 726 | sqr_add_c2(a, 2, 1, c1, c2, c3); | ||
| 727 | r[3] = c1; | ||
| 728 | c1 = 0; | ||
| 729 | sqr_add_c(a, 2, c2, c3, c1); | ||
| 730 | sqr_add_c2(a, 3, 1, c2, c3, c1); | ||
| 731 | r[4] = c2; | ||
| 732 | c2 = 0; | ||
| 733 | sqr_add_c2(a, 3, 2, c3, c1, c2); | ||
| 734 | r[5] = c3; | ||
| 735 | c3 = 0; | ||
| 736 | sqr_add_c(a, 3, c1, c2, c3); | ||
| 737 | r[6] = c1; | ||
| 738 | r[7] = c2; | ||
| 739 | } | ||
| 740 | |||
| 741 | #ifdef OPENSSL_NO_ASM | 482 | #ifdef OPENSSL_NO_ASM |
| 742 | #ifdef OPENSSL_BN_ASM_MONT | 483 | #ifdef OPENSSL_BN_ASM_MONT |
| 743 | /* | 484 | /* |
| @@ -853,45 +594,6 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U | |||
| 853 | 594 | ||
| 854 | #else /* !BN_MUL_COMBA */ | 595 | #else /* !BN_MUL_COMBA */ |
| 855 | 596 | ||
| 856 | /* hmm... is it faster just to do a multiply? */ | ||
| 857 | #undef bn_sqr_comba4 | ||
| 858 | void | ||
| 859 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | ||
| 860 | { | ||
| 861 | BN_ULONG t[8]; | ||
| 862 | bn_sqr_normal(r, a, 4, t); | ||
| 863 | } | ||
| 864 | |||
| 865 | #undef bn_sqr_comba8 | ||
| 866 | void | ||
| 867 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | ||
| 868 | { | ||
| 869 | BN_ULONG t[16]; | ||
| 870 | bn_sqr_normal(r, a, 8, t); | ||
| 871 | } | ||
| 872 | |||
| 873 | void | ||
| 874 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
| 875 | { | ||
| 876 | r[4] = bn_mul_words(&(r[0]), a, 4, b[0]); | ||
| 877 | r[5] = bn_mul_add_words(&(r[1]), a, 4, b[1]); | ||
| 878 | r[6] = bn_mul_add_words(&(r[2]), a, 4, b[2]); | ||
| 879 | r[7] = bn_mul_add_words(&(r[3]), a, 4, b[3]); | ||
| 880 | } | ||
| 881 | |||
| 882 | void | ||
| 883 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
| 884 | { | ||
| 885 | r[8] = bn_mul_words(&(r[0]), a, 8, b[0]); | ||
| 886 | r[9] = bn_mul_add_words(&(r[1]), a, 8, b[1]); | ||
| 887 | r[10] = bn_mul_add_words(&(r[2]), a, 8, b[2]); | ||
| 888 | r[11] = bn_mul_add_words(&(r[3]), a, 8, b[3]); | ||
| 889 | r[12] = bn_mul_add_words(&(r[4]), a, 8, b[4]); | ||
| 890 | r[13] = bn_mul_add_words(&(r[5]), a, 8, b[5]); | ||
| 891 | r[14] = bn_mul_add_words(&(r[6]), a, 8, b[6]); | ||
| 892 | r[15] = bn_mul_add_words(&(r[7]), a, 8, b[7]); | ||
| 893 | } | ||
| 894 | |||
| 895 | #ifdef OPENSSL_NO_ASM | 597 | #ifdef OPENSSL_NO_ASM |
| 896 | #ifdef OPENSSL_BN_ASM_MONT | 598 | #ifdef OPENSSL_BN_ASM_MONT |
| 897 | int | 599 | int |
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index b7a7f8bcef..3a69ef35da 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_mul.c,v 1.27 2023/01/20 12:16:46 jsing Exp $ */ | 1 | /* $OpenBSD: bn_mul.c,v 1.28 2023/01/20 17:31:52 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -62,8 +62,157 @@ | |||
| 62 | 62 | ||
| 63 | #include <openssl/opensslconf.h> | 63 | #include <openssl/opensslconf.h> |
| 64 | 64 | ||
| 65 | #include "bn_arch.h" | ||
| 65 | #include "bn_local.h" | 66 | #include "bn_local.h" |
| 66 | 67 | ||
| 68 | #ifndef HAVE_BN_MUL_COMBA4 | ||
| 69 | void | ||
| 70 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
| 71 | { | ||
| 72 | BN_ULONG c1, c2, c3; | ||
| 73 | |||
| 74 | c1 = 0; | ||
| 75 | c2 = 0; | ||
| 76 | c3 = 0; | ||
| 77 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
| 78 | r[0] = c1; | ||
| 79 | c1 = 0; | ||
| 80 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
| 81 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
| 82 | r[1] = c2; | ||
| 83 | c2 = 0; | ||
| 84 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
| 85 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
| 86 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
| 87 | r[2] = c3; | ||
| 88 | c3 = 0; | ||
| 89 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
| 90 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
| 91 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
| 92 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
| 93 | r[3] = c1; | ||
| 94 | c1 = 0; | ||
| 95 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
| 96 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
| 97 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
| 98 | r[4] = c2; | ||
| 99 | c2 = 0; | ||
| 100 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
| 101 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
| 102 | r[5] = c3; | ||
| 103 | c3 = 0; | ||
| 104 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
| 105 | r[6] = c1; | ||
| 106 | r[7] = c2; | ||
| 107 | } | ||
| 108 | #endif | ||
| 109 | |||
| 110 | #ifndef HAVE_BN_MUL_COMBA8 | ||
| 111 | void | ||
| 112 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
| 113 | { | ||
| 114 | BN_ULONG c1, c2, c3; | ||
| 115 | |||
| 116 | c1 = 0; | ||
| 117 | c2 = 0; | ||
| 118 | c3 = 0; | ||
| 119 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
| 120 | r[0] = c1; | ||
| 121 | c1 = 0; | ||
| 122 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
| 123 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
| 124 | r[1] = c2; | ||
| 125 | c2 = 0; | ||
| 126 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
| 127 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
| 128 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
| 129 | r[2] = c3; | ||
| 130 | c3 = 0; | ||
| 131 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
| 132 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
| 133 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
| 134 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
| 135 | r[3] = c1; | ||
| 136 | c1 = 0; | ||
| 137 | mul_add_c(a[4], b[0], c2, c3, c1); | ||
| 138 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
| 139 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
| 140 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
| 141 | mul_add_c(a[0], b[4], c2, c3, c1); | ||
| 142 | r[4] = c2; | ||
| 143 | c2 = 0; | ||
| 144 | mul_add_c(a[0], b[5], c3, c1, c2); | ||
| 145 | mul_add_c(a[1], b[4], c3, c1, c2); | ||
| 146 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
| 147 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
| 148 | mul_add_c(a[4], b[1], c3, c1, c2); | ||
| 149 | mul_add_c(a[5], b[0], c3, c1, c2); | ||
| 150 | r[5] = c3; | ||
| 151 | c3 = 0; | ||
| 152 | mul_add_c(a[6], b[0], c1, c2, c3); | ||
| 153 | mul_add_c(a[5], b[1], c1, c2, c3); | ||
| 154 | mul_add_c(a[4], b[2], c1, c2, c3); | ||
| 155 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
| 156 | mul_add_c(a[2], b[4], c1, c2, c3); | ||
| 157 | mul_add_c(a[1], b[5], c1, c2, c3); | ||
| 158 | mul_add_c(a[0], b[6], c1, c2, c3); | ||
| 159 | r[6] = c1; | ||
| 160 | c1 = 0; | ||
| 161 | mul_add_c(a[0], b[7], c2, c3, c1); | ||
| 162 | mul_add_c(a[1], b[6], c2, c3, c1); | ||
| 163 | mul_add_c(a[2], b[5], c2, c3, c1); | ||
| 164 | mul_add_c(a[3], b[4], c2, c3, c1); | ||
| 165 | mul_add_c(a[4], b[3], c2, c3, c1); | ||
| 166 | mul_add_c(a[5], b[2], c2, c3, c1); | ||
| 167 | mul_add_c(a[6], b[1], c2, c3, c1); | ||
| 168 | mul_add_c(a[7], b[0], c2, c3, c1); | ||
| 169 | r[7] = c2; | ||
| 170 | c2 = 0; | ||
| 171 | mul_add_c(a[7], b[1], c3, c1, c2); | ||
| 172 | mul_add_c(a[6], b[2], c3, c1, c2); | ||
| 173 | mul_add_c(a[5], b[3], c3, c1, c2); | ||
| 174 | mul_add_c(a[4], b[4], c3, c1, c2); | ||
| 175 | mul_add_c(a[3], b[5], c3, c1, c2); | ||
| 176 | mul_add_c(a[2], b[6], c3, c1, c2); | ||
| 177 | mul_add_c(a[1], b[7], c3, c1, c2); | ||
| 178 | r[8] = c3; | ||
| 179 | c3 = 0; | ||
| 180 | mul_add_c(a[2], b[7], c1, c2, c3); | ||
| 181 | mul_add_c(a[3], b[6], c1, c2, c3); | ||
| 182 | mul_add_c(a[4], b[5], c1, c2, c3); | ||
| 183 | mul_add_c(a[5], b[4], c1, c2, c3); | ||
| 184 | mul_add_c(a[6], b[3], c1, c2, c3); | ||
| 185 | mul_add_c(a[7], b[2], c1, c2, c3); | ||
| 186 | r[9] = c1; | ||
| 187 | c1 = 0; | ||
| 188 | mul_add_c(a[7], b[3], c2, c3, c1); | ||
| 189 | mul_add_c(a[6], b[4], c2, c3, c1); | ||
| 190 | mul_add_c(a[5], b[5], c2, c3, c1); | ||
| 191 | mul_add_c(a[4], b[6], c2, c3, c1); | ||
| 192 | mul_add_c(a[3], b[7], c2, c3, c1); | ||
| 193 | r[10] = c2; | ||
| 194 | c2 = 0; | ||
| 195 | mul_add_c(a[4], b[7], c3, c1, c2); | ||
| 196 | mul_add_c(a[5], b[6], c3, c1, c2); | ||
| 197 | mul_add_c(a[6], b[5], c3, c1, c2); | ||
| 198 | mul_add_c(a[7], b[4], c3, c1, c2); | ||
| 199 | r[11] = c3; | ||
| 200 | c3 = 0; | ||
| 201 | mul_add_c(a[7], b[5], c1, c2, c3); | ||
| 202 | mul_add_c(a[6], b[6], c1, c2, c3); | ||
| 203 | mul_add_c(a[5], b[7], c1, c2, c3); | ||
| 204 | r[12] = c1; | ||
| 205 | c1 = 0; | ||
| 206 | mul_add_c(a[6], b[7], c2, c3, c1); | ||
| 207 | mul_add_c(a[7], b[6], c2, c3, c1); | ||
| 208 | r[13] = c2; | ||
| 209 | c2 = 0; | ||
| 210 | mul_add_c(a[7], b[7], c3, c1, c2); | ||
| 211 | r[14] = c3; | ||
| 212 | r[15] = c1; | ||
| 213 | } | ||
| 214 | #endif | ||
| 215 | |||
| 67 | #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) | 216 | #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) |
| 68 | /* | 217 | /* |
| 69 | * Here follows a specialised variant of bn_sub_words(), which has the property | 218 | * Here follows a specialised variant of bn_sub_words(), which has the property |
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c index 56ea378527..02b87556d4 100644 --- a/src/lib/libcrypto/bn/bn_sqr.c +++ b/src/lib/libcrypto/bn/bn_sqr.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_sqr.c,v 1.18 2023/01/16 17:56:25 jsing Exp $ */ | 1 | /* $OpenBSD: bn_sqr.c,v 1.19 2023/01/20 17:31:52 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -59,8 +59,123 @@ | |||
| 59 | #include <stdio.h> | 59 | #include <stdio.h> |
| 60 | #include <string.h> | 60 | #include <string.h> |
| 61 | 61 | ||
| 62 | #include "bn_arch.h" | ||
| 62 | #include "bn_local.h" | 63 | #include "bn_local.h" |
| 63 | 64 | ||
| 65 | #ifndef HAVE_BN_SQR_COMBA4 | ||
| 66 | void | ||
| 67 | bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) | ||
| 68 | { | ||
| 69 | BN_ULONG c1, c2, c3; | ||
| 70 | |||
| 71 | c1 = 0; | ||
| 72 | c2 = 0; | ||
| 73 | c3 = 0; | ||
| 74 | sqr_add_c(a, 0, c1, c2, c3); | ||
| 75 | r[0] = c1; | ||
| 76 | c1 = 0; | ||
| 77 | sqr_add_c2(a, 1, 0, c2, c3, c1); | ||
| 78 | r[1] = c2; | ||
| 79 | c2 = 0; | ||
| 80 | sqr_add_c(a, 1, c3, c1, c2); | ||
| 81 | sqr_add_c2(a, 2, 0, c3, c1, c2); | ||
| 82 | r[2] = c3; | ||
| 83 | c3 = 0; | ||
| 84 | sqr_add_c2(a, 3, 0, c1, c2, c3); | ||
| 85 | sqr_add_c2(a, 2, 1, c1, c2, c3); | ||
| 86 | r[3] = c1; | ||
| 87 | c1 = 0; | ||
| 88 | sqr_add_c(a, 2, c2, c3, c1); | ||
| 89 | sqr_add_c2(a, 3, 1, c2, c3, c1); | ||
| 90 | r[4] = c2; | ||
| 91 | c2 = 0; | ||
| 92 | sqr_add_c2(a, 3, 2, c3, c1, c2); | ||
| 93 | r[5] = c3; | ||
| 94 | c3 = 0; | ||
| 95 | sqr_add_c(a, 3, c1, c2, c3); | ||
| 96 | r[6] = c1; | ||
| 97 | r[7] = c2; | ||
| 98 | } | ||
| 99 | #endif | ||
| 100 | |||
| 101 | #ifndef HAVE_BN_SQR_COMBA8 | ||
| 102 | void | ||
| 103 | bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | ||
| 104 | { | ||
| 105 | BN_ULONG c1, c2, c3; | ||
| 106 | |||
| 107 | c1 = 0; | ||
| 108 | c2 = 0; | ||
| 109 | c3 = 0; | ||
| 110 | sqr_add_c(a, 0, c1, c2, c3); | ||
| 111 | r[0] = c1; | ||
| 112 | c1 = 0; | ||
| 113 | sqr_add_c2(a, 1, 0, c2, c3, c1); | ||
| 114 | r[1] = c2; | ||
| 115 | c2 = 0; | ||
| 116 | sqr_add_c(a, 1, c3, c1, c2); | ||
| 117 | sqr_add_c2(a, 2, 0, c3, c1, c2); | ||
| 118 | r[2] = c3; | ||
| 119 | c3 = 0; | ||
| 120 | sqr_add_c2(a, 3, 0, c1, c2, c3); | ||
| 121 | sqr_add_c2(a, 2, 1, c1, c2, c3); | ||
| 122 | r[3] = c1; | ||
| 123 | c1 = 0; | ||
| 124 | sqr_add_c(a, 2, c2, c3, c1); | ||
| 125 | sqr_add_c2(a, 3, 1, c2, c3, c1); | ||
| 126 | sqr_add_c2(a, 4, 0, c2, c3, c1); | ||
| 127 | r[4] = c2; | ||
| 128 | c2 = 0; | ||
| 129 | sqr_add_c2(a, 5, 0, c3, c1, c2); | ||
| 130 | sqr_add_c2(a, 4, 1, c3, c1, c2); | ||
| 131 | sqr_add_c2(a, 3, 2, c3, c1, c2); | ||
| 132 | r[5] = c3; | ||
| 133 | c3 = 0; | ||
| 134 | sqr_add_c(a, 3, c1, c2, c3); | ||
| 135 | sqr_add_c2(a, 4, 2, c1, c2, c3); | ||
| 136 | sqr_add_c2(a, 5, 1, c1, c2, c3); | ||
| 137 | sqr_add_c2(a, 6, 0, c1, c2, c3); | ||
| 138 | r[6] = c1; | ||
| 139 | c1 = 0; | ||
| 140 | sqr_add_c2(a, 7, 0, c2, c3, c1); | ||
| 141 | sqr_add_c2(a, 6, 1, c2, c3, c1); | ||
| 142 | sqr_add_c2(a, 5, 2, c2, c3, c1); | ||
| 143 | sqr_add_c2(a, 4, 3, c2, c3, c1); | ||
| 144 | r[7] = c2; | ||
| 145 | c2 = 0; | ||
| 146 | sqr_add_c(a, 4, c3, c1, c2); | ||
| 147 | sqr_add_c2(a, 5, 3, c3, c1, c2); | ||
| 148 | sqr_add_c2(a, 6, 2, c3, c1, c2); | ||
| 149 | sqr_add_c2(a, 7, 1, c3, c1, c2); | ||
| 150 | r[8] = c3; | ||
| 151 | c3 = 0; | ||
| 152 | sqr_add_c2(a, 7, 2, c1, c2, c3); | ||
| 153 | sqr_add_c2(a, 6, 3, c1, c2, c3); | ||
| 154 | sqr_add_c2(a, 5, 4, c1, c2, c3); | ||
| 155 | r[9] = c1; | ||
| 156 | c1 = 0; | ||
| 157 | sqr_add_c(a, 5, c2, c3, c1); | ||
| 158 | sqr_add_c2(a, 6, 4, c2, c3, c1); | ||
| 159 | sqr_add_c2(a, 7, 3, c2, c3, c1); | ||
| 160 | r[10] = c2; | ||
| 161 | c2 = 0; | ||
| 162 | sqr_add_c2(a, 7, 4, c3, c1, c2); | ||
| 163 | sqr_add_c2(a, 6, 5, c3, c1, c2); | ||
| 164 | r[11] = c3; | ||
| 165 | c3 = 0; | ||
| 166 | sqr_add_c(a, 6, c1, c2, c3); | ||
| 167 | sqr_add_c2(a, 7, 5, c1, c2, c3); | ||
| 168 | r[12] = c1; | ||
| 169 | c1 = 0; | ||
| 170 | sqr_add_c2(a, 7, 6, c2, c3, c1); | ||
| 171 | r[13] = c2; | ||
| 172 | c2 = 0; | ||
| 173 | sqr_add_c(a, 7, c3, c1, c2); | ||
| 174 | r[14] = c3; | ||
| 175 | r[15] = c1; | ||
| 176 | } | ||
| 177 | #endif | ||
| 178 | |||
| 64 | /* tmp must have 2*n words */ | 179 | /* tmp must have 2*n words */ |
| 65 | void | 180 | void |
| 66 | bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp) | 181 | bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp) |
