diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-10-06 01:09:37 +0200 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-10-06 01:11:48 +0200 |
commit | c78428461513afed5e3bf272bcbf17964cbd61a3 (patch) | |
tree | f8edb17e335ce89508c297e3450587c75425530d | |
parent | 2430fcfd8de47f786aca1185ae0500fa36c6a548 (diff) | |
download | busybox-w32-c78428461513afed5e3bf272bcbf17964cbd61a3.tar.gz busybox-w32-c78428461513afed5e3bf272bcbf17964cbd61a3.tar.bz2 busybox-w32-c78428461513afed5e3bf272bcbf17964cbd61a3.zip |
tls: P256: propagate constants, create dedicated "subtract p256_mod" function
8 instances of this subtraction probably warrant a few bytes more of code.
function old new delta
sp_256_sub_8_p256_mod - 71 +71
sp_256_mont_sub_8 - 29 +29
sp_256_mont_dbl_8 - 26 +26
sp_256_mont_reduce_8 262 257 -5
sp_256_ecc_mulmod_8 1171 1161 -10
sp_256_proj_point_dbl_8 374 359 -15
static.sp_256_mont_sub_8 29 - -29
static.sp_256_mont_dbl_8 31 - -31
------------------------------------------------------------------------------
(add/remove: 3/2 grow/shrink: 0/3 up/down: 126/-90) Total: 36 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | networking/tls_sp_c32.c | 140 |
1 files changed, 108 insertions, 32 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 0773a2d47..1ab6106a7 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c | |||
@@ -291,6 +291,74 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
291 | #endif | 291 | #endif |
292 | } | 292 | } |
293 | 293 | ||
294 | /* Sub p256_mod from a into r. (r = a - p256_mod). */ | ||
295 | static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a) | ||
296 | { | ||
297 | #if ALLOW_ASM && defined(__GNUC__) && defined(__i386__) | ||
298 | sp_digit reg; | ||
299 | //p256_mod[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff | ||
300 | asm volatile ( | ||
301 | "\n movl (%0), %2" | ||
302 | "\n subl $0xffffffff, %2" | ||
303 | "\n movl %2, (%1)" | ||
304 | "\n" | ||
305 | "\n movl 1*4(%0), %2" | ||
306 | "\n sbbl $0xffffffff, %2" | ||
307 | "\n movl %2, 1*4(%1)" | ||
308 | "\n" | ||
309 | "\n movl 2*4(%0), %2" | ||
310 | "\n sbbl $0xffffffff, %2" | ||
311 | "\n movl %2, 2*4(%1)" | ||
312 | "\n" | ||
313 | "\n movl 3*4(%0), %2" | ||
314 | "\n sbbl $0, %2" | ||
315 | "\n movl %2, 3*4(%1)" | ||
316 | "\n" | ||
317 | "\n movl 4*4(%0), %2" | ||
318 | "\n sbbl $0, %2" | ||
319 | "\n movl %2, 4*4(%1)" | ||
320 | "\n" | ||
321 | "\n movl 5*4(%0), %2" | ||
322 | "\n sbbl $0, %2" | ||
323 | "\n movl %2, 5*4(%1)" | ||
324 | "\n" | ||
325 | "\n movl 6*4(%0), %2" | ||
326 | "\n sbbl $1, %2" | ||
327 | "\n movl %2, 6*4(%1)" | ||
328 | "\n" | ||
329 | "\n movl 7*4(%0), %2" | ||
330 | "\n sbbl $0xffffffff, %2" | ||
331 | "\n movl %2, 7*4(%1)" | ||
332 | "\n" | ||
333 | : "=r" (a), "=r" (r), "=r" (reg) | ||
334 | : "0" (a), "1" (r) | ||
335 | : "memory" | ||
336 | ); | ||
337 | #else | ||
338 | const sp_digit* b = p256_mod; | ||
339 | int i; | ||
340 | sp_digit borrow; | ||
341 | |||
342 | borrow = 0; | ||
343 | for (i = 0; i < 8; i++) { | ||
344 | sp_digit w, v; | ||
345 | w = b[i] + borrow; | ||
346 | v = a[i]; | ||
347 | if (w != 0) { | ||
348 | v = a[i] - w; | ||
349 | borrow = (v > a[i]); | ||
350 | /* hope compiler detects above as "carry flag set" */ | ||
351 | } | ||
352 | /* else: b + borrow == 0, two cases: | ||
353 | * b:ffffffff, borrow:1 | ||
354 | * b:00000000, borrow:0 | ||
355 | * in either case, r[i] = a[i] and borrow remains unchanged | ||
356 | */ | ||
357 | r[i] = v; | ||
358 | } | ||
359 | #endif | ||
360 | } | ||
361 | |||
294 | /* Multiply a and b into r. (r = a * b) */ | 362 | /* Multiply a and b into r. (r = a * b) */ |
295 | static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | 363 | static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) |
296 | { | 364 | { |
@@ -425,21 +493,25 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) | |||
425 | } | 493 | } |
426 | 494 | ||
427 | /* Add two Montgomery form numbers (r = a + b % m) */ | 495 | /* Add two Montgomery form numbers (r = a + b % m) */ |
428 | static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, | 496 | static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b |
429 | const sp_digit* m) | 497 | /*, const sp_digit* m*/) |
430 | { | 498 | { |
499 | // const sp_digit* m = p256_mod; | ||
500 | |||
431 | int carry = sp_256_add_8(r, a, b); | 501 | int carry = sp_256_add_8(r, a, b); |
432 | sp_256_norm_8(r); | 502 | sp_256_norm_8(r); |
433 | if (carry) { | 503 | if (carry) { |
434 | sp_256_sub_8(r, r, m); | 504 | sp_256_sub_8_p256_mod(r, r /*, m*/); |
435 | sp_256_norm_8(r); | 505 | sp_256_norm_8(r); |
436 | } | 506 | } |
437 | } | 507 | } |
438 | 508 | ||
439 | /* Subtract two Montgomery form numbers (r = a - b % m) */ | 509 | /* Subtract two Montgomery form numbers (r = a - b % m) */ |
440 | static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, | 510 | static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b |
441 | const sp_digit* m) | 511 | /*, const sp_digit* m*/) |
442 | { | 512 | { |
513 | const sp_digit* m = p256_mod; | ||
514 | |||
443 | int borrow; | 515 | int borrow; |
444 | borrow = sp_256_sub_8(r, a, b); | 516 | borrow = sp_256_sub_8(r, a, b); |
445 | sp_256_norm_8(r); | 517 | sp_256_norm_8(r); |
@@ -450,28 +522,32 @@ static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, | |||
450 | } | 522 | } |
451 | 523 | ||
452 | /* Double a Montgomery form number (r = a + a % m) */ | 524 | /* Double a Montgomery form number (r = a + a % m) */ |
453 | static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) | 525 | static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* m*/) |
454 | { | 526 | { |
527 | // const sp_digit* m = p256_mod; | ||
528 | |||
455 | int carry = sp_256_add_8(r, a, a); | 529 | int carry = sp_256_add_8(r, a, a); |
456 | sp_256_norm_8(r); | 530 | sp_256_norm_8(r); |
457 | if (carry) | 531 | if (carry) |
458 | sp_256_sub_8(r, r, m); | 532 | sp_256_sub_8_p256_mod(r, r /*, m*/); |
459 | sp_256_norm_8(r); | 533 | sp_256_norm_8(r); |
460 | } | 534 | } |
461 | 535 | ||
462 | /* Triple a Montgomery form number (r = a + a + a % m) */ | 536 | /* Triple a Montgomery form number (r = a + a + a % m) */ |
463 | static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) | 537 | static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* m*/) |
464 | { | 538 | { |
539 | // const sp_digit* m = p256_mod; | ||
540 | |||
465 | int carry = sp_256_add_8(r, a, a); | 541 | int carry = sp_256_add_8(r, a, a); |
466 | sp_256_norm_8(r); | 542 | sp_256_norm_8(r); |
467 | if (carry) { | 543 | if (carry) { |
468 | sp_256_sub_8(r, r, m); | 544 | sp_256_sub_8_p256_mod(r, r /*, m*/); |
469 | sp_256_norm_8(r); | 545 | sp_256_norm_8(r); |
470 | } | 546 | } |
471 | carry = sp_256_add_8(r, r, a); | 547 | carry = sp_256_add_8(r, r, a); |
472 | sp_256_norm_8(r); | 548 | sp_256_norm_8(r); |
473 | if (carry) { | 549 | if (carry) { |
474 | sp_256_sub_8(r, r, m); | 550 | sp_256_sub_8_p256_mod(r, r /*, m*/); |
475 | sp_256_norm_8(r); | 551 | sp_256_norm_8(r); |
476 | } | 552 | } |
477 | } | 553 | } |
@@ -612,7 +688,7 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) | |||
612 | */ | 688 | */ |
613 | static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/) | 689 | static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/) |
614 | { | 690 | { |
615 | const sp_digit* m = p256_mod; | 691 | // const sp_digit* m = p256_mod; |
616 | sp_digit mp = p256_mp_mod; | 692 | sp_digit mp = p256_mp_mod; |
617 | 693 | ||
618 | int i; | 694 | int i; |
@@ -635,13 +711,13 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/ | |||
635 | } | 711 | } |
636 | sp_256_mont_shift_8(a, a); | 712 | sp_256_mont_shift_8(a, a); |
637 | if (word16th != 0) | 713 | if (word16th != 0) |
638 | sp_256_sub_8(a, a, m); | 714 | sp_256_sub_8_p256_mod(a, a /*, m*/); |
639 | sp_256_norm_8(a); | 715 | sp_256_norm_8(a); |
640 | } | 716 | } |
641 | else { /* Same code for explicit mp == 1 (which is always the case for P256) */ | 717 | else { /* Same code for explicit mp == 1 (which is always the case for P256) */ |
642 | sp_digit word16th = 0; | 718 | sp_digit word16th = 0; |
643 | for (i = 0; i < 8; i++) { | 719 | for (i = 0; i < 8; i++) { |
644 | // mu = a[i]; | 720 | /*mu = a[i];*/ |
645 | if (sp_256_mul_add_8(a+i /*, m, mu*/)) { | 721 | if (sp_256_mul_add_8(a+i /*, m, mu*/)) { |
646 | int j = i + 8; | 722 | int j = i + 8; |
647 | inc_next_word: | 723 | inc_next_word: |
@@ -655,7 +731,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/ | |||
655 | } | 731 | } |
656 | sp_256_mont_shift_8(a, a); | 732 | sp_256_mont_shift_8(a, a); |
657 | if (word16th != 0) | 733 | if (word16th != 0) |
658 | sp_256_sub_8(a, a, m); | 734 | sp_256_sub_8_p256_mod(a, a /*, m*/); |
659 | sp_256_norm_8(a); | 735 | sp_256_norm_8(a); |
660 | } | 736 | } |
661 | } | 737 | } |
@@ -909,7 +985,7 @@ static void sp_256_map_8(sp_point* r, sp_point* p) | |||
909 | sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/); | 985 | sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/); |
910 | /* Reduce x to less than modulus */ | 986 | /* Reduce x to less than modulus */ |
911 | if (sp_256_cmp_8(r->x, p256_mod) >= 0) | 987 | if (sp_256_cmp_8(r->x, p256_mod) >= 0) |
912 | sp_256_sub_8(r->x, r->x, p256_mod); | 988 | sp_256_sub_8_p256_mod(r->x, r->x /*, p256_mod*/); |
913 | sp_256_norm_8(r->x); | 989 | sp_256_norm_8(r->x); |
914 | 990 | ||
915 | /* y /= z^3 */ | 991 | /* y /= z^3 */ |
@@ -918,7 +994,7 @@ static void sp_256_map_8(sp_point* r, sp_point* p) | |||
918 | sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/); | 994 | sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/); |
919 | /* Reduce y to less than modulus */ | 995 | /* Reduce y to less than modulus */ |
920 | if (sp_256_cmp_8(r->y, p256_mod) >= 0) | 996 | if (sp_256_cmp_8(r->y, p256_mod) >= 0) |
921 | sp_256_sub_8(r->y, r->y, p256_mod); | 997 | sp_256_sub_8_p256_mod(r->y, r->y /*, p256_mod*/); |
922 | sp_256_norm_8(r->y); | 998 | sp_256_norm_8(r->y); |
923 | 999 | ||
924 | memset(r->z, 0, sizeof(r->z)); | 1000 | memset(r->z, 0, sizeof(r->z)); |
@@ -954,17 +1030,17 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) | |||
954 | /* Z = Y * Z */ | 1030 | /* Z = Y * Z */ |
955 | sp_256_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/); | 1031 | sp_256_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/); |
956 | /* Z = 2Z */ | 1032 | /* Z = 2Z */ |
957 | sp_256_mont_dbl_8(r->z, r->z, p256_mod); | 1033 | sp_256_mont_dbl_8(r->z, r->z /*, p256_mod*/); |
958 | /* T2 = X - T1 */ | 1034 | /* T2 = X - T1 */ |
959 | sp_256_mont_sub_8(t2, r->x, t1, p256_mod); | 1035 | sp_256_mont_sub_8(t2, r->x, t1 /*, p256_mod*/); |
960 | /* T1 = X + T1 */ | 1036 | /* T1 = X + T1 */ |
961 | sp_256_mont_add_8(t1, r->x, t1, p256_mod); | 1037 | sp_256_mont_add_8(t1, r->x, t1 /*, p256_mod*/); |
962 | /* T2 = T1 * T2 */ | 1038 | /* T2 = T1 * T2 */ |
963 | sp_256_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/); | 1039 | sp_256_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/); |
964 | /* T1 = 3T2 */ | 1040 | /* T1 = 3T2 */ |
965 | sp_256_mont_tpl_8(t1, t2, p256_mod); | 1041 | sp_256_mont_tpl_8(t1, t2 /*, p256_mod*/); |
966 | /* Y = 2Y */ | 1042 | /* Y = 2Y */ |
967 | sp_256_mont_dbl_8(r->y, r->y, p256_mod); | 1043 | sp_256_mont_dbl_8(r->y, r->y /*, p256_mod*/); |
968 | /* Y = Y * Y */ | 1044 | /* Y = Y * Y */ |
969 | sp_256_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/); | 1045 | sp_256_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/); |
970 | /* T2 = Y * Y */ | 1046 | /* T2 = Y * Y */ |
@@ -976,15 +1052,15 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p) | |||
976 | /* X = T1 * T1 */ | 1052 | /* X = T1 * T1 */ |
977 | sp_256_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/); | 1053 | sp_256_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/); |
978 | /* X = X - Y */ | 1054 | /* X = X - Y */ |
979 | sp_256_mont_sub_8(r->x, r->x, r->y, p256_mod); | 1055 | sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/); |
980 | /* X = X - Y */ | 1056 | /* X = X - Y */ |
981 | sp_256_mont_sub_8(r->x, r->x, r->y, p256_mod); | 1057 | sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/); |
982 | /* Y = Y - X */ | 1058 | /* Y = Y - X */ |
983 | sp_256_mont_sub_8(r->y, r->y, r->x, p256_mod); | 1059 | sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/); |
984 | /* Y = Y * T1 */ | 1060 | /* Y = Y * T1 */ |
985 | sp_256_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/); | 1061 | sp_256_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/); |
986 | /* Y = Y - T2 */ | 1062 | /* Y = Y - T2 */ |
987 | sp_256_mont_sub_8(r->y, r->y, t2, p256_mod); | 1063 | sp_256_mont_sub_8(r->y, r->y, t2 /*, p256_mod*/); |
988 | dump_512("y2 %s\n", r->y); | 1064 | dump_512("y2 %s\n", r->y); |
989 | } | 1065 | } |
990 | 1066 | ||
@@ -1043,9 +1119,9 @@ static void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q) | |||
1043 | /* S2 = Y2*Z1^3 */ | 1119 | /* S2 = Y2*Z1^3 */ |
1044 | sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); | 1120 | sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); |
1045 | /* H = U2 - U1 */ | 1121 | /* H = U2 - U1 */ |
1046 | sp_256_mont_sub_8(t2, t2, t1, p256_mod); | 1122 | sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/); |
1047 | /* R = S2 - S1 */ | 1123 | /* R = S2 - S1 */ |
1048 | sp_256_mont_sub_8(t4, t4, t3, p256_mod); | 1124 | sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/); |
1049 | /* Z3 = H*Z1*Z2 */ | 1125 | /* Z3 = H*Z1*Z2 */ |
1050 | sp_256_mont_mul_8(v->z, v->z, q->z /*, p256_mod, p256_mp_mod*/); | 1126 | sp_256_mont_mul_8(v->z, v->z, q->z /*, p256_mod, p256_mp_mod*/); |
1051 | sp_256_mont_mul_8(v->z, v->z, t2 /*, p256_mod, p256_mp_mod*/); | 1127 | sp_256_mont_mul_8(v->z, v->z, t2 /*, p256_mod, p256_mp_mod*/); |
@@ -1054,14 +1130,14 @@ static void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q) | |||
1054 | sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/); | 1130 | sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/); |
1055 | sp_256_mont_mul_8(v->y, t1, t5 /*, p256_mod, p256_mp_mod*/); | 1131 | sp_256_mont_mul_8(v->y, t1, t5 /*, p256_mod, p256_mp_mod*/); |
1056 | sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); | 1132 | sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); |
1057 | sp_256_mont_sub_8(v->x, v->x, t5, p256_mod); | 1133 | sp_256_mont_sub_8(v->x, v->x, t5 /*, p256_mod*/); |
1058 | sp_256_mont_dbl_8(t1, v->y, p256_mod); | 1134 | sp_256_mont_dbl_8(t1, v->y /*, p256_mod*/); |
1059 | sp_256_mont_sub_8(v->x, v->x, t1, p256_mod); | 1135 | sp_256_mont_sub_8(v->x, v->x, t1 /*, p256_mod*/); |
1060 | /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ | 1136 | /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ |
1061 | sp_256_mont_sub_8(v->y, v->y, v->x, p256_mod); | 1137 | sp_256_mont_sub_8(v->y, v->y, v->x /*, p256_mod*/); |
1062 | sp_256_mont_mul_8(v->y, v->y, t4 /*, p256_mod, p256_mp_mod*/); | 1138 | sp_256_mont_mul_8(v->y, v->y, t4 /*, p256_mod, p256_mp_mod*/); |
1063 | sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); | 1139 | sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); |
1064 | sp_256_mont_sub_8(v->y, v->y, t5, p256_mod); | 1140 | sp_256_mont_sub_8(v->y, v->y, t5 /*, p256_mod*/); |
1065 | } | 1141 | } |
1066 | } | 1142 | } |
1067 | 1143 | ||