aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2021-10-06 01:09:37 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2021-10-06 01:11:48 +0200
commitc78428461513afed5e3bf272bcbf17964cbd61a3 (patch)
treef8edb17e335ce89508c297e3450587c75425530d
parent2430fcfd8de47f786aca1185ae0500fa36c6a548 (diff)
downloadbusybox-w32-c78428461513afed5e3bf272bcbf17964cbd61a3.tar.gz
busybox-w32-c78428461513afed5e3bf272bcbf17964cbd61a3.tar.bz2
busybox-w32-c78428461513afed5e3bf272bcbf17964cbd61a3.zip
tls: P256: propagate constants, create dedicated "subtract p256_mod" function
8 instances of this subtraction probably warrant a few bytes more of code. function old new delta sp_256_sub_8_p256_mod - 71 +71 sp_256_mont_sub_8 - 29 +29 sp_256_mont_dbl_8 - 26 +26 sp_256_mont_reduce_8 262 257 -5 sp_256_ecc_mulmod_8 1171 1161 -10 sp_256_proj_point_dbl_8 374 359 -15 static.sp_256_mont_sub_8 29 - -29 static.sp_256_mont_dbl_8 31 - -31 ------------------------------------------------------------------------------ (add/remove: 3/2 grow/shrink: 0/3 up/down: 126/-90) Total: 36 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--networking/tls_sp_c32.c140
1 files changed, 108 insertions, 32 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 0773a2d47..1ab6106a7 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -291,6 +291,74 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
291#endif 291#endif
292} 292}
293 293
294/* Sub p256_mod from a into r. (r = a - p256_mod). */
295static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a)
296{
297#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
298 sp_digit reg;
299//p256_mod[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff
300 asm volatile (
301"\n movl (%0), %2"
302"\n subl $0xffffffff, %2"
303"\n movl %2, (%1)"
304"\n"
305"\n movl 1*4(%0), %2"
306"\n sbbl $0xffffffff, %2"
307"\n movl %2, 1*4(%1)"
308"\n"
309"\n movl 2*4(%0), %2"
310"\n sbbl $0xffffffff, %2"
311"\n movl %2, 2*4(%1)"
312"\n"
313"\n movl 3*4(%0), %2"
314"\n sbbl $0, %2"
315"\n movl %2, 3*4(%1)"
316"\n"
317"\n movl 4*4(%0), %2"
318"\n sbbl $0, %2"
319"\n movl %2, 4*4(%1)"
320"\n"
321"\n movl 5*4(%0), %2"
322"\n sbbl $0, %2"
323"\n movl %2, 5*4(%1)"
324"\n"
325"\n movl 6*4(%0), %2"
326"\n sbbl $1, %2"
327"\n movl %2, 6*4(%1)"
328"\n"
329"\n movl 7*4(%0), %2"
330"\n sbbl $0xffffffff, %2"
331"\n movl %2, 7*4(%1)"
332"\n"
333 : "=r" (a), "=r" (r), "=r" (reg)
334 : "0" (a), "1" (r)
335 : "memory"
336 );
337#else
338 const sp_digit* b = p256_mod;
339 int i;
340 sp_digit borrow;
341
342 borrow = 0;
343 for (i = 0; i < 8; i++) {
344 sp_digit w, v;
345 w = b[i] + borrow;
346 v = a[i];
347 if (w != 0) {
348 v = a[i] - w;
349 borrow = (v > a[i]);
350 /* hope compiler detects above as "carry flag set" */
351 }
352 /* else: b + borrow == 0, two cases:
353 * b:ffffffff, borrow:1
354 * b:00000000, borrow:0
355 * in either case, r[i] = a[i] and borrow remains unchanged
356 */
357 r[i] = v;
358 }
359#endif
360}
361
294/* Multiply a and b into r. (r = a * b) */ 362/* Multiply a and b into r. (r = a * b) */
295static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) 363static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
296{ 364{
@@ -425,21 +493,25 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
425} 493}
426 494
427/* Add two Montgomery form numbers (r = a + b % m) */ 495/* Add two Montgomery form numbers (r = a + b % m) */
428static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b, 496static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b
429 const sp_digit* m) 497 /*, const sp_digit* m*/)
430{ 498{
499// const sp_digit* m = p256_mod;
500
431 int carry = sp_256_add_8(r, a, b); 501 int carry = sp_256_add_8(r, a, b);
432 sp_256_norm_8(r); 502 sp_256_norm_8(r);
433 if (carry) { 503 if (carry) {
434 sp_256_sub_8(r, r, m); 504 sp_256_sub_8_p256_mod(r, r /*, m*/);
435 sp_256_norm_8(r); 505 sp_256_norm_8(r);
436 } 506 }
437} 507}
438 508
439/* Subtract two Montgomery form numbers (r = a - b % m) */ 509/* Subtract two Montgomery form numbers (r = a - b % m) */
440static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b, 510static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b
441 const sp_digit* m) 511 /*, const sp_digit* m*/)
442{ 512{
513 const sp_digit* m = p256_mod;
514
443 int borrow; 515 int borrow;
444 borrow = sp_256_sub_8(r, a, b); 516 borrow = sp_256_sub_8(r, a, b);
445 sp_256_norm_8(r); 517 sp_256_norm_8(r);
@@ -450,28 +522,32 @@ static void sp_256_mont_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b,
450} 522}
451 523
452/* Double a Montgomery form number (r = a + a % m) */ 524/* Double a Montgomery form number (r = a + a % m) */
453static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) 525static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* m*/)
454{ 526{
527// const sp_digit* m = p256_mod;
528
455 int carry = sp_256_add_8(r, a, a); 529 int carry = sp_256_add_8(r, a, a);
456 sp_256_norm_8(r); 530 sp_256_norm_8(r);
457 if (carry) 531 if (carry)
458 sp_256_sub_8(r, r, m); 532 sp_256_sub_8_p256_mod(r, r /*, m*/);
459 sp_256_norm_8(r); 533 sp_256_norm_8(r);
460} 534}
461 535
462/* Triple a Montgomery form number (r = a + a + a % m) */ 536/* Triple a Montgomery form number (r = a + a + a % m) */
463static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a, const sp_digit* m) 537static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit* m*/)
464{ 538{
539// const sp_digit* m = p256_mod;
540
465 int carry = sp_256_add_8(r, a, a); 541 int carry = sp_256_add_8(r, a, a);
466 sp_256_norm_8(r); 542 sp_256_norm_8(r);
467 if (carry) { 543 if (carry) {
468 sp_256_sub_8(r, r, m); 544 sp_256_sub_8_p256_mod(r, r /*, m*/);
469 sp_256_norm_8(r); 545 sp_256_norm_8(r);
470 } 546 }
471 carry = sp_256_add_8(r, r, a); 547 carry = sp_256_add_8(r, r, a);
472 sp_256_norm_8(r); 548 sp_256_norm_8(r);
473 if (carry) { 549 if (carry) {
474 sp_256_sub_8(r, r, m); 550 sp_256_sub_8_p256_mod(r, r /*, m*/);
475 sp_256_norm_8(r); 551 sp_256_norm_8(r);
476 } 552 }
477} 553}
@@ -612,7 +688,7 @@ static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
612 */ 688 */
613static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/) 689static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/)
614{ 690{
615 const sp_digit* m = p256_mod; 691// const sp_digit* m = p256_mod;
616 sp_digit mp = p256_mp_mod; 692 sp_digit mp = p256_mp_mod;
617 693
618 int i; 694 int i;
@@ -635,13 +711,13 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
635 } 711 }
636 sp_256_mont_shift_8(a, a); 712 sp_256_mont_shift_8(a, a);
637 if (word16th != 0) 713 if (word16th != 0)
638 sp_256_sub_8(a, a, m); 714 sp_256_sub_8_p256_mod(a, a /*, m*/);
639 sp_256_norm_8(a); 715 sp_256_norm_8(a);
640 } 716 }
641 else { /* Same code for explicit mp == 1 (which is always the case for P256) */ 717 else { /* Same code for explicit mp == 1 (which is always the case for P256) */
642 sp_digit word16th = 0; 718 sp_digit word16th = 0;
643 for (i = 0; i < 8; i++) { 719 for (i = 0; i < 8; i++) {
644// mu = a[i]; 720 /*mu = a[i];*/
645 if (sp_256_mul_add_8(a+i /*, m, mu*/)) { 721 if (sp_256_mul_add_8(a+i /*, m, mu*/)) {
646 int j = i + 8; 722 int j = i + 8;
647 inc_next_word: 723 inc_next_word:
@@ -655,7 +731,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
655 } 731 }
656 sp_256_mont_shift_8(a, a); 732 sp_256_mont_shift_8(a, a);
657 if (word16th != 0) 733 if (word16th != 0)
658 sp_256_sub_8(a, a, m); 734 sp_256_sub_8_p256_mod(a, a /*, m*/);
659 sp_256_norm_8(a); 735 sp_256_norm_8(a);
660 } 736 }
661} 737}
@@ -909,7 +985,7 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
909 sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/); 985 sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/);
910 /* Reduce x to less than modulus */ 986 /* Reduce x to less than modulus */
911 if (sp_256_cmp_8(r->x, p256_mod) >= 0) 987 if (sp_256_cmp_8(r->x, p256_mod) >= 0)
912 sp_256_sub_8(r->x, r->x, p256_mod); 988 sp_256_sub_8_p256_mod(r->x, r->x /*, p256_mod*/);
913 sp_256_norm_8(r->x); 989 sp_256_norm_8(r->x);
914 990
915 /* y /= z^3 */ 991 /* y /= z^3 */
@@ -918,7 +994,7 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
918 sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/); 994 sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/);
919 /* Reduce y to less than modulus */ 995 /* Reduce y to less than modulus */
920 if (sp_256_cmp_8(r->y, p256_mod) >= 0) 996 if (sp_256_cmp_8(r->y, p256_mod) >= 0)
921 sp_256_sub_8(r->y, r->y, p256_mod); 997 sp_256_sub_8_p256_mod(r->y, r->y /*, p256_mod*/);
922 sp_256_norm_8(r->y); 998 sp_256_norm_8(r->y);
923 999
924 memset(r->z, 0, sizeof(r->z)); 1000 memset(r->z, 0, sizeof(r->z));
@@ -954,17 +1030,17 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
954 /* Z = Y * Z */ 1030 /* Z = Y * Z */
955 sp_256_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/); 1031 sp_256_mont_mul_8(r->z, r->y, r->z /*, p256_mod, p256_mp_mod*/);
956 /* Z = 2Z */ 1032 /* Z = 2Z */
957 sp_256_mont_dbl_8(r->z, r->z, p256_mod); 1033 sp_256_mont_dbl_8(r->z, r->z /*, p256_mod*/);
958 /* T2 = X - T1 */ 1034 /* T2 = X - T1 */
959 sp_256_mont_sub_8(t2, r->x, t1, p256_mod); 1035 sp_256_mont_sub_8(t2, r->x, t1 /*, p256_mod*/);
960 /* T1 = X + T1 */ 1036 /* T1 = X + T1 */
961 sp_256_mont_add_8(t1, r->x, t1, p256_mod); 1037 sp_256_mont_add_8(t1, r->x, t1 /*, p256_mod*/);
962 /* T2 = T1 * T2 */ 1038 /* T2 = T1 * T2 */
963 sp_256_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/); 1039 sp_256_mont_mul_8(t2, t1, t2 /*, p256_mod, p256_mp_mod*/);
964 /* T1 = 3T2 */ 1040 /* T1 = 3T2 */
965 sp_256_mont_tpl_8(t1, t2, p256_mod); 1041 sp_256_mont_tpl_8(t1, t2 /*, p256_mod*/);
966 /* Y = 2Y */ 1042 /* Y = 2Y */
967 sp_256_mont_dbl_8(r->y, r->y, p256_mod); 1043 sp_256_mont_dbl_8(r->y, r->y /*, p256_mod*/);
968 /* Y = Y * Y */ 1044 /* Y = Y * Y */
969 sp_256_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/); 1045 sp_256_mont_sqr_8(r->y, r->y /*, p256_mod, p256_mp_mod*/);
970 /* T2 = Y * Y */ 1046 /* T2 = Y * Y */
@@ -976,15 +1052,15 @@ static void sp_256_proj_point_dbl_8(sp_point* r, sp_point* p)
976 /* X = T1 * T1 */ 1052 /* X = T1 * T1 */
977 sp_256_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/); 1053 sp_256_mont_mul_8(r->x, t1, t1 /*, p256_mod, p256_mp_mod*/);
978 /* X = X - Y */ 1054 /* X = X - Y */
979 sp_256_mont_sub_8(r->x, r->x, r->y, p256_mod); 1055 sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/);
980 /* X = X - Y */ 1056 /* X = X - Y */
981 sp_256_mont_sub_8(r->x, r->x, r->y, p256_mod); 1057 sp_256_mont_sub_8(r->x, r->x, r->y /*, p256_mod*/);
982 /* Y = Y - X */ 1058 /* Y = Y - X */
983 sp_256_mont_sub_8(r->y, r->y, r->x, p256_mod); 1059 sp_256_mont_sub_8(r->y, r->y, r->x /*, p256_mod*/);
984 /* Y = Y * T1 */ 1060 /* Y = Y * T1 */
985 sp_256_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/); 1061 sp_256_mont_mul_8(r->y, r->y, t1 /*, p256_mod, p256_mp_mod*/);
986 /* Y = Y - T2 */ 1062 /* Y = Y - T2 */
987 sp_256_mont_sub_8(r->y, r->y, t2, p256_mod); 1063 sp_256_mont_sub_8(r->y, r->y, t2 /*, p256_mod*/);
988 dump_512("y2 %s\n", r->y); 1064 dump_512("y2 %s\n", r->y);
989} 1065}
990 1066
@@ -1043,9 +1119,9 @@ static void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q)
1043 /* S2 = Y2*Z1^3 */ 1119 /* S2 = Y2*Z1^3 */
1044 sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/); 1120 sp_256_mont_mul_8(t4, t4, q->y /*, p256_mod, p256_mp_mod*/);
1045 /* H = U2 - U1 */ 1121 /* H = U2 - U1 */
1046 sp_256_mont_sub_8(t2, t2, t1, p256_mod); 1122 sp_256_mont_sub_8(t2, t2, t1 /*, p256_mod*/);
1047 /* R = S2 - S1 */ 1123 /* R = S2 - S1 */
1048 sp_256_mont_sub_8(t4, t4, t3, p256_mod); 1124 sp_256_mont_sub_8(t4, t4, t3 /*, p256_mod*/);
1049 /* Z3 = H*Z1*Z2 */ 1125 /* Z3 = H*Z1*Z2 */
1050 sp_256_mont_mul_8(v->z, v->z, q->z /*, p256_mod, p256_mp_mod*/); 1126 sp_256_mont_mul_8(v->z, v->z, q->z /*, p256_mod, p256_mp_mod*/);
1051 sp_256_mont_mul_8(v->z, v->z, t2 /*, p256_mod, p256_mp_mod*/); 1127 sp_256_mont_mul_8(v->z, v->z, t2 /*, p256_mod, p256_mp_mod*/);
@@ -1054,14 +1130,14 @@ static void sp_256_proj_point_add_8(sp_point* r, sp_point* p, sp_point* q)
1054 sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/); 1130 sp_256_mont_sqr_8(t5, t2 /*, p256_mod, p256_mp_mod*/);
1055 sp_256_mont_mul_8(v->y, t1, t5 /*, p256_mod, p256_mp_mod*/); 1131 sp_256_mont_mul_8(v->y, t1, t5 /*, p256_mod, p256_mp_mod*/);
1056 sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/); 1132 sp_256_mont_mul_8(t5, t5, t2 /*, p256_mod, p256_mp_mod*/);
1057 sp_256_mont_sub_8(v->x, v->x, t5, p256_mod); 1133 sp_256_mont_sub_8(v->x, v->x, t5 /*, p256_mod*/);
1058 sp_256_mont_dbl_8(t1, v->y, p256_mod); 1134 sp_256_mont_dbl_8(t1, v->y /*, p256_mod*/);
1059 sp_256_mont_sub_8(v->x, v->x, t1, p256_mod); 1135 sp_256_mont_sub_8(v->x, v->x, t1 /*, p256_mod*/);
1060 /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */ 1136 /* Y3 = R*(U1*H^2 - X3) - S1*H^3 */
1061 sp_256_mont_sub_8(v->y, v->y, v->x, p256_mod); 1137 sp_256_mont_sub_8(v->y, v->y, v->x /*, p256_mod*/);
1062 sp_256_mont_mul_8(v->y, v->y, t4 /*, p256_mod, p256_mp_mod*/); 1138 sp_256_mont_mul_8(v->y, v->y, t4 /*, p256_mod, p256_mp_mod*/);
1063 sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/); 1139 sp_256_mont_mul_8(t5, t5, t3 /*, p256_mod, p256_mp_mod*/);
1064 sp_256_mont_sub_8(v->y, v->y, t5, p256_mod); 1140 sp_256_mont_sub_8(v->y, v->y, t5 /*, p256_mod*/);
1065 } 1141 }
1066} 1142}
1067 1143