diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-10-06 20:14:49 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-10-06 20:19:30 +0200 |
commit | 5e9c6170218826dded581b99dfd225b0c76c6c86 (patch) | |
tree | e74ba5e448720883e342c7dc19abc99a39d41a4d /networking | |
parent | 87e3f2e9f8a1c99b223b316fbefb5ae49c2a8fe2 (diff) | |
download | busybox-w32-5e9c6170218826dded581b99dfd225b0c76c6c86.tar.gz busybox-w32-5e9c6170218826dded581b99dfd225b0c76c6c86.tar.bz2 busybox-w32-5e9c6170218826dded581b99dfd225b0c76c6c86.zip |
tls: P256: sp_256_sub_8_p256_mod always subtracts in-place, use that
i386:
function old new delta
sp_256_mont_reduce_8 245 243 -2
sp_256_mont_dbl_8 26 24 -2
sp_256_ecc_mulmod_8 1161 1157 -4
sp_256_proj_point_dbl_8 359 353 -6
sp_256_sub_8_p256_mod 71 32 -39
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/5 up/down: 0/-53) Total: -53 bytes
non-asm code:
function old new delta
sp_256_sub_8_p256_mod - 12 +12
sp_256_mont_reduce_8 250 243 -7
sp_256_mont_dbl_8 31 24 -7
sp_256_ecc_mulmod_8 1171 1157 -14
sp_256_proj_point_dbl_8 374 353 -21
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/4 up/down: 12/-49) Total: -37 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'networking')
-rw-r--r-- | networking/tls_sp_c32.c | 99 |
1 files changed, 36 insertions, 63 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 1391cb405..b3828d817 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c | |||
@@ -346,82 +346,55 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
346 | #endif | 346 | #endif |
347 | } | 347 | } |
348 | 348 | ||
349 | /* Sub p256_mod from a into r. (r = a - p256_mod). */ | 349 | /* Sub p256_mod from r. (r = r - p256_mod). */ |
350 | #if ALLOW_ASM && defined(__GNUC__) && defined(__i386__) | 350 | #if ALLOW_ASM && defined(__GNUC__) && defined(__i386__) |
351 | static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a) | 351 | static void sp_256_sub_8_p256_mod(sp_digit* r) |
352 | { | 352 | { |
353 | sp_digit reg; | ||
354 | //p256_mod[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff | 353 | //p256_mod[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff |
355 | asm volatile ( | 354 | asm volatile ( |
356 | "\n movl (%0), %2" | 355 | "\n subl $0xffffffff, (%0)" |
357 | "\n subl $0xffffffff, %2" | 356 | "\n sbbl $0xffffffff, 1*4(%0)" |
358 | "\n movl %2, (%1)" | 357 | "\n sbbl $0xffffffff, 2*4(%0)" |
359 | "\n" | 358 | "\n sbbl $0, 3*4(%0)" |
360 | "\n movl 1*4(%0), %2" | 359 | "\n sbbl $0, 4*4(%0)" |
361 | "\n sbbl $0xffffffff, %2" | 360 | "\n sbbl $0, 5*4(%0)" |
362 | "\n movl %2, 1*4(%1)" | 361 | "\n sbbl $1, 6*4(%0)" |
363 | "\n" | 362 | "\n sbbl $0xffffffff, 7*4(%0)" |
364 | "\n movl 2*4(%0), %2" | ||
365 | "\n sbbl $0xffffffff, %2" | ||
366 | "\n movl %2, 2*4(%1)" | ||
367 | "\n" | ||
368 | "\n movl 3*4(%0), %2" | ||
369 | "\n sbbl $0, %2" | ||
370 | "\n movl %2, 3*4(%1)" | ||
371 | "\n" | ||
372 | "\n movl 4*4(%0), %2" | ||
373 | "\n sbbl $0, %2" | ||
374 | "\n movl %2, 4*4(%1)" | ||
375 | "\n" | 363 | "\n" |
376 | "\n movl 5*4(%0), %2" | 364 | : "=r" (r) |
377 | "\n sbbl $0, %2" | 365 | : "0" (r) |
378 | "\n movl %2, 5*4(%1)" | ||
379 | "\n" | ||
380 | "\n movl 6*4(%0), %2" | ||
381 | "\n sbbl $1, %2" | ||
382 | "\n movl %2, 6*4(%1)" | ||
383 | "\n" | ||
384 | "\n movl 7*4(%0), %2" | ||
385 | "\n sbbl $0xffffffff, %2" | ||
386 | "\n movl %2, 7*4(%1)" | ||
387 | "\n" | ||
388 | : "=r" (a), "=r" (r), "=r" (reg) | ||
389 | : "0" (a), "1" (r) | ||
390 | : "memory" | 366 | : "memory" |
391 | ); | 367 | ); |
392 | } | 368 | } |
393 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) | 369 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) |
394 | static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a) | 370 | static void sp_256_sub_8_p256_mod(sp_digit* r) |
395 | { | 371 | { |
396 | uint64_t reg; | 372 | uint64_t reg; |
397 | uint64_t ooff; | 373 | uint64_t ooff; |
398 | //p256_mod[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff | 374 | //p256_mod[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff |
399 | asm volatile ( | 375 | asm volatile ( |
400 | "\n movq (%0), %3" | 376 | "\n addq $1, (%0)" // adding 1 is the same as subtracting ffffffffffffffff |
401 | "\n addq $1, %3" // adding 1 is the same as subtracting ffffffffffffffff | ||
402 | "\n movq %3, (%1)" // | ||
403 | "\n cmc" // only carry bit needs inverting | 377 | "\n cmc" // only carry bit needs inverting |
378 | |||
379 | "\n sbbq %1, 1*8(%0)" // %1 holds 00000000ffffffff | ||
380 | |||
381 | "\n sbbq $0, 2*8(%0)" | ||
404 | "\n" | 382 | "\n" |
405 | "\n movq 1*8(%0), %3" | 383 | "\n movq 3*8(%0), %2" |
406 | "\n sbbq %2, %3" // %2 holds 00000000ffffffff | 384 | "\n sbbq $0, %2" // adding 00000000ffffffff (in %1) |
407 | "\n movq %3, 1*8(%1)" | 385 | "\n addq %1, %2" // is the same as subtracting ffffffff00000001 |
408 | "\n" | 386 | "\n movq %2, 3*8(%0)" |
409 | "\n movq 2*8(%0), %3" | ||
410 | "\n sbbq $0, %3" | ||
411 | "\n movq %3, 2*8(%1)" | ||
412 | "\n" | ||
413 | "\n movq 3*8(%0), %3" | ||
414 | "\n sbbq $0, %3" // adding 00000000ffffffff (in %2) | ||
415 | "\n addq %2, %3" // is the same as subtracting ffffffff00000001 | ||
416 | "\n movq %3, 3*8(%1)" | ||
417 | "\n" | 387 | "\n" |
418 | : "=r" (a), "=r" (r), "=r" (ooff), "=r" (reg) | 388 | : "=r" (r), "=r" (ooff), "=r" (reg) |
419 | : "0" (a), "1" (r), "2" (0x00000000ffffffff) | 389 | : "0" (r), "1" (0x00000000ffffffff) |
420 | : "memory" | 390 | : "memory" |
421 | ); | 391 | ); |
422 | } | 392 | } |
423 | #else | 393 | #else |
424 | # define sp_256_sub_8_p256_mod(r, a) sp_256_sub_8((r), (a), p256_mod) | 394 | static void sp_256_sub_8_p256_mod(sp_digit* r) |
395 | { | ||
396 | sp_256_sub_8(r, r, p256_mod); | ||
397 | } | ||
425 | #endif | 398 | #endif |
426 | 399 | ||
427 | /* Multiply a and b into r. (r = a * b) */ | 400 | /* Multiply a and b into r. (r = a * b) */ |
@@ -609,7 +582,7 @@ static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b | |||
609 | int carry = sp_256_add_8(r, a, b); | 582 | int carry = sp_256_add_8(r, a, b); |
610 | sp_256_norm_8(r); | 583 | sp_256_norm_8(r); |
611 | if (carry) { | 584 | if (carry) { |
612 | sp_256_sub_8_p256_mod(r, r /*, m*/); | 585 | sp_256_sub_8_p256_mod(r); |
613 | sp_256_norm_8(r); | 586 | sp_256_norm_8(r); |
614 | } | 587 | } |
615 | } | 588 | } |
@@ -637,7 +610,7 @@ static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* | |||
637 | int carry = sp_256_add_8(r, a, a); | 610 | int carry = sp_256_add_8(r, a, a); |
638 | sp_256_norm_8(r); | 611 | sp_256_norm_8(r); |
639 | if (carry) | 612 | if (carry) |
640 | sp_256_sub_8_p256_mod(r, r /*, m*/); | 613 | sp_256_sub_8_p256_mod(r); |
641 | sp_256_norm_8(r); | 614 | sp_256_norm_8(r); |
642 | } | 615 | } |
643 | 616 | ||
@@ -649,13 +622,13 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* | |||
649 | int carry = sp_256_add_8(r, a, a); | 622 | int carry = sp_256_add_8(r, a, a); |
650 | sp_256_norm_8(r); | 623 | sp_256_norm_8(r); |
651 | if (carry) { | 624 | if (carry) { |
652 | sp_256_sub_8_p256_mod(r, r /*, m*/); | 625 | sp_256_sub_8_p256_mod(r); |
653 | sp_256_norm_8(r); | 626 | sp_256_norm_8(r); |
654 | } | 627 | } |
655 | carry = sp_256_add_8(r, r, a); | 628 | carry = sp_256_add_8(r, r, a); |
656 | sp_256_norm_8(r); | 629 | sp_256_norm_8(r); |
657 | if (carry) { | 630 | if (carry) { |
658 | sp_256_sub_8_p256_mod(r, r /*, m*/); | 631 | sp_256_sub_8_p256_mod(r); |
659 | sp_256_norm_8(r); | 632 | sp_256_norm_8(r); |
660 | } | 633 | } |
661 | } | 634 | } |
@@ -829,7 +802,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/ | |||
829 | } | 802 | } |
830 | sp_256_mont_shift_8(a, a); | 803 | sp_256_mont_shift_8(a, a); |
831 | if (word16th != 0) | 804 | if (word16th != 0) |
832 | sp_256_sub_8_p256_mod(a, a /*, m*/); | 805 | sp_256_sub_8_p256_mod(a); |
833 | sp_256_norm_8(a); | 806 | sp_256_norm_8(a); |
834 | } | 807 | } |
835 | else { /* Same code for explicit mp == 1 (which is always the case for P256) */ | 808 | else { /* Same code for explicit mp == 1 (which is always the case for P256) */ |
@@ -849,7 +822,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/ | |||
849 | } | 822 | } |
850 | sp_256_mont_shift_8(a, a); | 823 | sp_256_mont_shift_8(a, a); |
851 | if (word16th != 0) | 824 | if (word16th != 0) |
852 | sp_256_sub_8_p256_mod(a, a /*, m*/); | 825 | sp_256_sub_8_p256_mod(a); |
853 | sp_256_norm_8(a); | 826 | sp_256_norm_8(a); |
854 | } | 827 | } |
855 | } | 828 | } |
@@ -1104,7 +1077,7 @@ static void sp_256_map_8(sp_point* r, sp_point* p) | |||
1104 | sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/); | 1077 | sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/); |
1105 | /* Reduce x to less than modulus */ | 1078 | /* Reduce x to less than modulus */ |
1106 | if (sp_256_cmp_8(r->x, p256_mod) >= 0) | 1079 | if (sp_256_cmp_8(r->x, p256_mod) >= 0) |
1107 | sp_256_sub_8_p256_mod(r->x, r->x /*, p256_mod*/); | 1080 | sp_256_sub_8_p256_mod(r->x); |
1108 | sp_256_norm_8(r->x); | 1081 | sp_256_norm_8(r->x); |
1109 | 1082 | ||
1110 | /* y /= z^3 */ | 1083 | /* y /= z^3 */ |
@@ -1113,7 +1086,7 @@ static void sp_256_map_8(sp_point* r, sp_point* p) | |||
1113 | sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/); | 1086 | sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/); |
1114 | /* Reduce y to less than modulus */ | 1087 | /* Reduce y to less than modulus */ |
1115 | if (sp_256_cmp_8(r->y, p256_mod) >= 0) | 1088 | if (sp_256_cmp_8(r->y, p256_mod) >= 0) |
1116 | sp_256_sub_8_p256_mod(r->y, r->y /*, p256_mod*/); | 1089 | sp_256_sub_8_p256_mod(r->y); |
1117 | sp_256_norm_8(r->y); | 1090 | sp_256_norm_8(r->y); |
1118 | 1091 | ||
1119 | memset(r->z, 0, sizeof(r->z)); | 1092 | memset(r->z, 0, sizeof(r->z)); |