diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-10-06 10:15:29 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-10-06 10:15:29 +0200 |
commit | 00f2cceb6aa194aadcbe70675a0f0a0660aea233 (patch) | |
tree | 6d570d98453b6a54d655e37a3e2064dfa1fe357a | |
parent | c78428461513afed5e3bf272bcbf17964cbd61a3 (diff) | |
download | busybox-w32-00f2cceb6aa194aadcbe70675a0f0a0660aea233.tar.gz busybox-w32-00f2cceb6aa194aadcbe70675a0f0a0660aea233.tar.bz2 busybox-w32-00f2cceb6aa194aadcbe70675a0f0a0660aea233.zip |
tls: P256: shrink sp_256_mul_add_8 a bit more
function old new delta
sp_256_mont_reduce_8 257 245 -12
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | networking/tls_sp_c32.c | 38 |
1 files changed, 24 insertions, 14 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 1ab6106a7..6fca2aad8 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c | |||
@@ -569,8 +569,10 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) | |||
569 | // const sp_digit* a = p256_mod; | 569 | // const sp_digit* a = p256_mod; |
570 | //a[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff | 570 | //a[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff |
571 | sp_digit b = r[0]; | 571 | sp_digit b = r[0]; |
572 | uint64_t t = 0; | ||
573 | 572 | ||
573 | uint64_t t; | ||
574 | |||
575 | // t = 0; | ||
574 | // for (i = 0; i < 8; i++) { | 576 | // for (i = 0; i < 8; i++) { |
575 | // uint32_t t_hi; | 577 | // uint32_t t_hi; |
576 | // uint64_t m = ((uint64_t)b * a[i]) + r[i]; | 578 | // uint64_t m = ((uint64_t)b * a[i]) + r[i]; |
@@ -584,12 +586,13 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) | |||
584 | // Unroll, then optimize the above loop: | 586 | // Unroll, then optimize the above loop: |
585 | //uint32_t t_hi; | 587 | //uint32_t t_hi; |
586 | uint64_t m; | 588 | uint64_t m; |
589 | uint32_t t32; | ||
587 | 590 | ||
588 | //m = ((uint64_t)b * a[0]) + r[0]; | 591 | //m = ((uint64_t)b * a[0]) + r[0]; |
589 | // Since b is r[0] and a[0] is ffffffff, the above optimizes to: | 592 | // Since b is r[0] and a[0] is ffffffff, the above optimizes to: |
590 | // m = r[0] * ffffffff + r[0] = (r[0] * 100000000 - r[0]) + r[0] = r[0] << 32; | 593 | // m = r[0] * ffffffff + r[0] = (r[0] * 100000000 - r[0]) + r[0] = r[0] << 32; |
591 | //t += m; | 594 | //t += m; |
592 | // t = (uint64_t)r[0] << 32; | 595 | // t = r[0] << 32 = b << 32; |
593 | //t_hi = (t < m); | 596 | //t_hi = (t < m); |
594 | // t_hi = 0; | 597 | // t_hi = 0; |
595 | //r[0] = (sp_digit)t; | 598 | //r[0] = (sp_digit)t; |
@@ -625,42 +628,49 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) | |||
625 | // Since a[3] is 00000000, the above optimizes to: | 628 | // Since a[3] is 00000000, the above optimizes to: |
626 | // m = b * 0 + r[3] = r[3]; | 629 | // m = b * 0 + r[3] = r[3]; |
627 | //t += m; | 630 | //t += m; |
628 | // t += r[3]; | 631 | // t = b + r[3]; |
629 | //t_hi = (t < m); | 632 | //t_hi = (t < m); |
630 | // t_hi = 0; | 633 | // t_hi = 0; |
631 | //r[3] = (sp_digit)t; | 634 | //r[3] = (sp_digit)t; |
632 | r[3] = r[3] + b; | 635 | r[3] = r[3] + b; |
633 | //t = (t >> 32) | ((uint64_t)t_hi << 32); | 636 | //t = (t >> 32) | ((uint64_t)t_hi << 32); |
634 | t = (r[3] < b); | 637 | t32 = (r[3] < b); // 0 or 1 |
635 | 638 | ||
636 | //m = ((uint64_t)b * a[4]) + r[4]; | 639 | //m = ((uint64_t)b * a[4]) + r[4]; |
637 | // Since a[4] is 00000000, the above optimizes to: | 640 | // Since a[4] is 00000000, the above optimizes to: |
638 | // m = b * 0 + r[4] = r[4]; | 641 | // m = b * 0 + r[4] = r[4]; |
639 | //t += m; | 642 | //t += m; |
640 | t += r[4]; | 643 | // t = t32 + r[4]; |
641 | //t_hi = (t < m); | 644 | //t_hi = (t < m); |
642 | // t_hi = 0; | 645 | // t_hi = 0; |
643 | r[4] = (sp_digit)t; | 646 | //r[4] = (sp_digit)t; |
644 | //t = (t >> 32) | ((uint64_t)t_hi << 32); | 647 | //t = (t >> 32) | ((uint64_t)t_hi << 32); |
645 | t = (t >> 32); | 648 | if (t32 != 0) { |
649 | r[4]++; | ||
650 | t32 = (r[4] == 0); // 0 or 1 | ||
646 | 651 | ||
647 | //m = ((uint64_t)b * a[5]) + r[5]; | 652 | //m = ((uint64_t)b * a[5]) + r[5]; |
648 | // Since a[5] is 00000000, the above optimizes to: | 653 | // Since a[5] is 00000000, the above optimizes to: |
649 | // m = b * 0 + r[5] = r[5]; | 654 | // m = b * 0 + r[5] = r[5]; |
650 | //t += m; | 655 | //t += m; |
651 | t += r[5]; | 656 | // t = t32 + r[5]; (t32 is 0 or 1) |
652 | //t_hi = (t < m); | 657 | //t_hi = (t < m); |
653 | // t_hi = 0; | 658 | // t_hi = 0; |
654 | r[5] = (sp_digit)t; | 659 | //r[5] = (sp_digit)t; |
655 | //t = (t >> 32) | ((uint64_t)t_hi << 32); | 660 | //t = (t >> 32) | ((uint64_t)t_hi << 32); |
656 | t = (t >> 32); | 661 | if (t32 != 0) { |
662 | r[5]++; | ||
663 | t32 = (r[5] == 0); // 0 or 1 | ||
664 | } | ||
665 | } | ||
657 | 666 | ||
658 | //m = ((uint64_t)b * a[6]) + r[6]; | 667 | //m = ((uint64_t)b * a[6]) + r[6]; |
659 | // Since a[6] is 00000001, the above optimizes to: | 668 | // Since a[6] is 00000001, the above optimizes to: |
660 | m = (uint64_t)b + r[6]; // 33 bits at most | 669 | // m = (uint64_t)b + r[6]; // 33 bits at most |
661 | t += m; | 670 | //t += m; |
671 | t = t32 + (uint64_t)b + r[6]; | ||
662 | //t_hi = (t < m); | 672 | //t_hi = (t < m); |
663 | // t_hi = 0; //32bit_value + 33bit_value can't overflow 64 bits | 673 | // t_hi = 0; |
664 | r[6] = (sp_digit)t; | 674 | r[6] = (sp_digit)t; |
665 | //t = (t >> 32) | ((uint64_t)t_hi << 32); | 675 | //t = (t >> 32) | ((uint64_t)t_hi << 32); |
666 | t = (t >> 32); | 676 | t = (t >> 32); |
@@ -671,7 +681,7 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) | |||
671 | m = ((uint64_t)b << 32) - b + r[7]; | 681 | m = ((uint64_t)b << 32) - b + r[7]; |
672 | t += m; | 682 | t += m; |
673 | //t_hi = (t < m); | 683 | //t_hi = (t < m); |
674 | // t_hi in fact is always 0 here | 684 | // t_hi in fact is always 0 here (256bit * 32bit can't have more than 32 bits of overflow) |
675 | r[7] = (sp_digit)t; | 685 | r[7] = (sp_digit)t; |
676 | //t = (t >> 32) | ((uint64_t)t_hi << 32); | 686 | //t = (t >> 32) | ((uint64_t)t_hi << 32); |
677 | t = (t >> 32); | 687 | t = (t >> 32); |