diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-04-27 13:09:44 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-04-27 13:09:44 +0200 |
commit | 646e85629497ee364e97101de4402d7141919144 (patch) | |
tree | 86913653cb8f3552f96061fa24360b6265bdd25a /networking/tls_sp_c32.c | |
parent | 48a18d15dfbfa41d137802e811a3abdfef012ac8 (diff) | |
download | busybox-w32-646e85629497ee364e97101de4402d7141919144.tar.gz busybox-w32-646e85629497ee364e97101de4402d7141919144.tar.bz2 busybox-w32-646e85629497ee364e97101de4402d7141919144.zip |
tls: shrink sp_256_mod_mul_norm_10
function old new delta
sp_256_mod_mul_norm_10 1439 1405 -34
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'networking/tls_sp_c32.c')
-rw-r--r-- | networking/tls_sp_c32.c | 145 |
1 files changed, 85 insertions, 60 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index f9c66b186..c5e887aad 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c | |||
@@ -460,51 +460,90 @@ static void sp_256_mont_inv_10(sp_digit* r, sp_digit* a) | |||
460 | static void sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a) | 460 | static void sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a) |
461 | { | 461 | { |
462 | int64_t t[8]; | 462 | int64_t t[8]; |
463 | int64_t a32[8]; | ||
464 | int64_t o; | 463 | int64_t o; |
465 | 464 | uint32_t a32; | |
466 | a32[0] = a[0]; | ||
467 | a32[0] |= a[1] << 26; | ||
468 | a32[0] &= 0xffffffff; | ||
469 | a32[1] = (sp_digit)(a[1] >> 6); | ||
470 | a32[1] |= a[2] << 20; | ||
471 | a32[1] &= 0xffffffff; | ||
472 | a32[2] = (sp_digit)(a[2] >> 12); | ||
473 | a32[2] |= a[3] << 14; | ||
474 | a32[2] &= 0xffffffff; | ||
475 | a32[3] = (sp_digit)(a[3] >> 18); | ||
476 | a32[3] |= a[4] << 8; | ||
477 | a32[3] &= 0xffffffff; | ||
478 | a32[4] = (sp_digit)(a[4] >> 24); | ||
479 | a32[4] |= a[5] << 2; | ||
480 | a32[4] |= a[6] << 28; | ||
481 | a32[4] &= 0xffffffff; | ||
482 | a32[5] = (sp_digit)(a[6] >> 4); | ||
483 | a32[5] |= a[7] << 22; | ||
484 | a32[5] &= 0xffffffff; | ||
485 | a32[6] = (sp_digit)(a[7] >> 10); | ||
486 | a32[6] |= a[8] << 16; | ||
487 | a32[6] &= 0xffffffff; | ||
488 | a32[7] = (sp_digit)(a[8] >> 16); | ||
489 | a32[7] |= a[9] << 10; | ||
490 | a32[7] &= 0xffffffff; | ||
491 | 465 | ||
492 | /* 1 1 0 -1 -1 -1 -1 0 */ | 466 | /* 1 1 0 -1 -1 -1 -1 0 */ |
493 | t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6]; | ||
494 | /* 0 1 1 0 -1 -1 -1 -1 */ | 467 | /* 0 1 1 0 -1 -1 -1 -1 */ |
495 | t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7]; | ||
496 | /* 0 0 1 1 0 -1 -1 -1 */ | 468 | /* 0 0 1 1 0 -1 -1 -1 */ |
497 | t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7]; | ||
498 | /* -1 -1 0 2 2 1 0 -1 */ | 469 | /* -1 -1 0 2 2 1 0 -1 */ |
499 | t[3] = 0 - a32[0] - a32[1] + 2 * a32[3] + 2 * a32[4] + a32[5] - a32[7]; | ||
500 | /* 0 -1 -1 0 2 2 1 0 */ | 470 | /* 0 -1 -1 0 2 2 1 0 */ |
501 | t[4] = 0 - a32[1] - a32[2] + 2 * a32[4] + 2 * a32[5] + a32[6]; | ||
502 | /* 0 0 -1 -1 0 2 2 1 */ | 471 | /* 0 0 -1 -1 0 2 2 1 */ |
503 | t[5] = 0 - a32[2] - a32[3] + 2 * a32[5] + 2 * a32[6] + a32[7]; | ||
504 | /* -1 -1 0 0 0 1 3 2 */ | 472 | /* -1 -1 0 0 0 1 3 2 */ |
505 | t[6] = 0 - a32[0] - a32[1] + a32[5] + 3 * a32[6] + 2 * a32[7]; | ||
506 | /* 1 0 -1 -1 -1 -1 0 3 */ | 473 | /* 1 0 -1 -1 -1 -1 0 3 */ |
507 | t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3 * a32[7]; | 474 | // t[] should be calculated from "a" (converted from 26-bit to 32-bit vector a32[8]) |
475 | // according to the above matrix: | ||
476 | //t[0] = 0 + a32[0] + a32[1] - a32[3] - a32[4] - a32[5] - a32[6] ; | ||
477 | //t[1] = 0 + a32[1] + a32[2] - a32[4] - a32[5] - a32[6] - a32[7] ; | ||
478 | //t[2] = 0 + a32[2] + a32[3] - a32[5] - a32[6] - a32[7] ; | ||
479 | //t[3] = 0 - a32[0] - a32[1] + 2*a32[3] + 2*a32[4] + a32[5] - a32[7] ; | ||
480 | //t[4] = 0 - a32[1] - a32[2] + 2*a32[4] + 2*a32[5] + a32[6] ; | ||
481 | //t[5] = 0 - a32[2] - a32[3] + 2*a32[5] + 2*a32[6] + a32[7] ; | ||
482 | //t[6] = 0 - a32[0] - a32[1] + a32[5] + 3*a32[6] + 2*a32[7]; | ||
483 | //t[7] = 0 + a32[0] - a32[2] - a32[3] - a32[4] - a32[5] + 3*a32[7]; | ||
484 | // We can do it "piecemeal" after each a32[i] is known, no need to store entire a32[8] vector: | ||
485 | |||
486 | #define A32 (int64_t)a32 | ||
487 | a32 = a[0] | (a[1] << 26); | ||
488 | t[0] = 0 + A32; | ||
489 | t[3] = 0 - A32; | ||
490 | t[6] = 0 - A32; | ||
491 | t[7] = 0 + A32; | ||
492 | |||
493 | a32 = (a[1] >> 6) | (a[2] << 20); | ||
494 | t[0] += A32 ; | ||
495 | t[1] = 0 + A32; | ||
496 | t[3] -= A32 ; | ||
497 | t[4] = 0 - A32; | ||
498 | t[6] -= A32 ; | ||
499 | |||
500 | a32 = (a[2] >> 12) | (a[3] << 14); | ||
501 | t[1] += A32 ; | ||
502 | t[2] = 0 + A32; | ||
503 | t[4] -= A32 ; | ||
504 | t[5] = 0 - A32; | ||
505 | t[7] -= A32 ; | ||
506 | |||
507 | a32 = (a[3] >> 18) | (a[4] << 8); | ||
508 | t[0] -= A32 ; | ||
509 | t[2] += A32 ; | ||
510 | t[3] += 2*A32; | ||
511 | t[5] -= A32 ; | ||
512 | t[7] -= A32 ; | ||
513 | |||
514 | a32 = (a[4] >> 24) | (a[5] << 2) | (a[6] << 28); | ||
515 | t[0] -= A32 ; | ||
516 | t[1] -= A32 ; | ||
517 | t[3] += 2*A32; | ||
518 | t[4] += 2*A32; | ||
519 | t[7] -= A32 ; | ||
520 | |||
521 | a32 = (a[6] >> 4) | (a[7] << 22); | ||
522 | t[0] -= A32 ; | ||
523 | t[1] -= A32 ; | ||
524 | t[2] -= A32 ; | ||
525 | t[3] += A32 ; | ||
526 | t[4] += 2*A32; | ||
527 | t[5] += 2*A32; | ||
528 | t[6] += A32 ; | ||
529 | t[7] -= A32 ; | ||
530 | |||
531 | a32 = (a[7] >> 10) | (a[8] << 16); | ||
532 | t[0] -= A32 ; | ||
533 | t[1] -= A32 ; | ||
534 | t[2] -= A32 ; | ||
535 | t[4] += A32 ; | ||
536 | t[5] += 2*A32; | ||
537 | t[6] += 3*A32; | ||
538 | |||
539 | a32 = (a[8] >> 16) | (a[9] << 10); | ||
540 | t[1] -= A32 ; | ||
541 | t[2] -= A32 ; | ||
542 | t[3] -= A32 ; | ||
543 | t[5] += A32 ; | ||
544 | t[6] += 2*A32; | ||
545 | t[7] += 3*A32; | ||
546 | #undef A32 | ||
508 | 547 | ||
509 | t[1] += t[0] >> 32; t[0] &= 0xffffffff; | 548 | t[1] += t[0] >> 32; t[0] &= 0xffffffff; |
510 | t[2] += t[1] >> 32; t[1] &= 0xffffffff; | 549 | t[2] += t[1] >> 32; t[1] &= 0xffffffff; |
@@ -526,30 +565,16 @@ static void sp_256_mod_mul_norm_10(sp_digit* r, const sp_digit* a) | |||
526 | t[6] += t[5] >> 32; t[5] &= 0xffffffff; | 565 | t[6] += t[5] >> 32; t[5] &= 0xffffffff; |
527 | t[7] += t[6] >> 32; t[6] &= 0xffffffff; | 566 | t[7] += t[6] >> 32; t[6] &= 0xffffffff; |
528 | 567 | ||
529 | r[0] = (sp_digit)(t[0]) & 0x3ffffff; | 568 | r[0] = 0x3ffffff & ((sp_digit)(t[0])); |
530 | r[1] = (sp_digit)(t[0] >> 26); | 569 | r[1] = 0x3ffffff & ((sp_digit)(t[0] >> 26) | ((sp_digit)t[1] << 6)); |
531 | r[1] |= t[1] << 6; | 570 | r[2] = 0x3ffffff & ((sp_digit)(t[1] >> 20) | ((sp_digit)t[2] << 12)); |
532 | r[1] &= 0x3ffffff; | 571 | r[3] = 0x3ffffff & ((sp_digit)(t[2] >> 14) | ((sp_digit)t[3] << 18)); |
533 | r[2] = (sp_digit)(t[1] >> 20); | 572 | r[4] = 0x3ffffff & ((sp_digit)(t[3] >> 8) | ((sp_digit)t[4] << 24)); |
534 | r[2] |= t[2] << 12; | 573 | r[5] = 0x3ffffff & ((sp_digit)t[4] >> 2); /* small shift, ok to cast t[4] to narrower type */ |
535 | r[2] &= 0x3ffffff; | 574 | r[6] = 0x3ffffff & ((sp_digit)(t[4] >> 28) | ((sp_digit)t[5] << 4)); |
536 | r[3] = (sp_digit)(t[2] >> 14); | 575 | r[7] = 0x3ffffff & ((sp_digit)(t[5] >> 22) | ((sp_digit)t[6] << 10)); |
537 | r[3] |= t[3] << 18; | 576 | r[8] = 0x3ffffff & ((sp_digit)(t[6] >> 16) | ((sp_digit)t[7] << 16)); |
538 | r[3] &= 0x3ffffff; | 577 | r[9] = ((sp_digit)(t[7] >> 10)); |
539 | r[4] = (sp_digit)(t[3] >> 8); | ||
540 | r[4] |= t[4] << 24; | ||
541 | r[4] &= 0x3ffffff; | ||
542 | r[5] = (sp_digit)(t[4] >> 2) & 0x3ffffff; | ||
543 | r[6] = (sp_digit)(t[4] >> 28); | ||
544 | r[6] |= t[5] << 4; | ||
545 | r[6] &= 0x3ffffff; | ||
546 | r[7] = (sp_digit)(t[5] >> 22); | ||
547 | r[7] |= t[6] << 10; | ||
548 | r[7] &= 0x3ffffff; | ||
549 | r[8] = (sp_digit)(t[6] >> 16); | ||
550 | r[8] |= t[7] << 16; | ||
551 | r[8] &= 0x3ffffff; | ||
552 | r[9] = (sp_digit)(t[7] >> 10); | ||
553 | } | 578 | } |
554 | 579 | ||
555 | /* Map the Montgomery form projective co-ordinate point to an affine point. | 580 | /* Map the Montgomery form projective co-ordinate point to an affine point. |
@@ -795,7 +820,7 @@ static void sp_256_ecc_mulmod_base_10(sp_point* r, sp_digit* k /*, int map*/) | |||
795 | 0x6b,0x17,0xd1,0xf2,0xe1,0x2c,0x42,0x47,0xf8,0xbc,0xe6,0xe5,0x63,0xa4,0x40,0xf2,0x77,0x03,0x7d,0x81,0x2d,0xeb,0x33,0xa0,0xf4,0xa1,0x39,0x45,0xd8,0x98,0xc2,0x96, | 820 | 0x6b,0x17,0xd1,0xf2,0xe1,0x2c,0x42,0x47,0xf8,0xbc,0xe6,0xe5,0x63,0xa4,0x40,0xf2,0x77,0x03,0x7d,0x81,0x2d,0xeb,0x33,0xa0,0xf4,0xa1,0x39,0x45,0xd8,0x98,0xc2,0x96, |
796 | /* y */ | 821 | /* y */ |
797 | 0x4f,0xe3,0x42,0xe2,0xfe,0x1a,0x7f,0x9b,0x8e,0xe7,0xeb,0x4a,0x7c,0x0f,0x9e,0x16,0x2b,0xce,0x33,0x57,0x6b,0x31,0x5e,0xce,0xcb,0xb6,0x40,0x68,0x37,0xbf,0x51,0xf5, | 822 | 0x4f,0xe3,0x42,0xe2,0xfe,0x1a,0x7f,0x9b,0x8e,0xe7,0xeb,0x4a,0x7c,0x0f,0x9e,0x16,0x2b,0xce,0x33,0x57,0x6b,0x31,0x5e,0xce,0xcb,0xb6,0x40,0x68,0x37,0xbf,0x51,0xf5, |
798 | /* z will be set to 0, infinity flag to "false" */ | 823 | /* z will be set to 1, infinity flag to "false" */ |
799 | }; | 824 | }; |
800 | sp_point p256_base; | 825 | sp_point p256_base; |
801 | 826 | ||