aboutsummaryrefslogtreecommitdiff
path: root/networking
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2021-10-06 20:14:49 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2021-10-06 20:19:30 +0200
commit5e9c6170218826dded581b99dfd225b0c76c6c86 (patch)
treee74ba5e448720883e342c7dc19abc99a39d41a4d /networking
parent87e3f2e9f8a1c99b223b316fbefb5ae49c2a8fe2 (diff)
downloadbusybox-w32-5e9c6170218826dded581b99dfd225b0c76c6c86.tar.gz
busybox-w32-5e9c6170218826dded581b99dfd225b0c76c6c86.tar.bz2
busybox-w32-5e9c6170218826dded581b99dfd225b0c76c6c86.zip
tls: P256: sp_256_sub_8_p256_mod always subtracts in-place, use that
i386: function old new delta sp_256_mont_reduce_8 245 243 -2 sp_256_mont_dbl_8 26 24 -2 sp_256_ecc_mulmod_8 1161 1157 -4 sp_256_proj_point_dbl_8 359 353 -6 sp_256_sub_8_p256_mod 71 32 -39 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/5 up/down: 0/-53) Total: -53 bytes non-asm code: function old new delta sp_256_sub_8_p256_mod - 12 +12 sp_256_mont_reduce_8 250 243 -7 sp_256_mont_dbl_8 31 24 -7 sp_256_ecc_mulmod_8 1171 1157 -14 sp_256_proj_point_dbl_8 374 353 -21 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 0/4 up/down: 12/-49) Total: -37 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'networking')
-rw-r--r--networking/tls_sp_c32.c99
1 files changed, 36 insertions, 63 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 1391cb405..b3828d817 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -346,82 +346,55 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
346#endif 346#endif
347} 347}
348 348
349/* Sub p256_mod from a into r. (r = a - p256_mod). */ 349/* Sub p256_mod from r. (r = r - p256_mod). */
350#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__) 350#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
351static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a) 351static void sp_256_sub_8_p256_mod(sp_digit* r)
352{ 352{
353 sp_digit reg;
354//p256_mod[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff 353//p256_mod[7..0] = ffffffff 00000001 00000000 00000000 00000000 ffffffff ffffffff ffffffff
355 asm volatile ( 354 asm volatile (
356"\n movl (%0), %2" 355"\n subl $0xffffffff, (%0)"
357"\n subl $0xffffffff, %2" 356"\n sbbl $0xffffffff, 1*4(%0)"
358"\n movl %2, (%1)" 357"\n sbbl $0xffffffff, 2*4(%0)"
359"\n" 358"\n sbbl $0, 3*4(%0)"
360"\n movl 1*4(%0), %2" 359"\n sbbl $0, 4*4(%0)"
361"\n sbbl $0xffffffff, %2" 360"\n sbbl $0, 5*4(%0)"
362"\n movl %2, 1*4(%1)" 361"\n sbbl $1, 6*4(%0)"
363"\n" 362"\n sbbl $0xffffffff, 7*4(%0)"
364"\n movl 2*4(%0), %2"
365"\n sbbl $0xffffffff, %2"
366"\n movl %2, 2*4(%1)"
367"\n"
368"\n movl 3*4(%0), %2"
369"\n sbbl $0, %2"
370"\n movl %2, 3*4(%1)"
371"\n"
372"\n movl 4*4(%0), %2"
373"\n sbbl $0, %2"
374"\n movl %2, 4*4(%1)"
375"\n" 363"\n"
376"\n movl 5*4(%0), %2" 364 : "=r" (r)
377"\n sbbl $0, %2" 365 : "0" (r)
378"\n movl %2, 5*4(%1)"
379"\n"
380"\n movl 6*4(%0), %2"
381"\n sbbl $1, %2"
382"\n movl %2, 6*4(%1)"
383"\n"
384"\n movl 7*4(%0), %2"
385"\n sbbl $0xffffffff, %2"
386"\n movl %2, 7*4(%1)"
387"\n"
388 : "=r" (a), "=r" (r), "=r" (reg)
389 : "0" (a), "1" (r)
390 : "memory" 366 : "memory"
391 ); 367 );
392} 368}
393#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) 369#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
394static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a) 370static void sp_256_sub_8_p256_mod(sp_digit* r)
395{ 371{
396 uint64_t reg; 372 uint64_t reg;
397 uint64_t ooff; 373 uint64_t ooff;
398//p256_mod[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff 374//p256_mod[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff
399 asm volatile ( 375 asm volatile (
400"\n movq (%0), %3" 376"\n addq $1, (%0)" // adding 1 is the same as subtracting ffffffffffffffff
401"\n addq $1, %3" // adding 1 is the same as subtracting ffffffffffffffff
402"\n movq %3, (%1)" //
403"\n cmc" // only carry bit needs inverting 377"\n cmc" // only carry bit needs inverting
378
379"\n sbbq %1, 1*8(%0)" // %1 holds 00000000ffffffff
380
381"\n sbbq $0, 2*8(%0)"
404"\n" 382"\n"
405"\n movq 1*8(%0), %3" 383"\n movq 3*8(%0), %2"
406"\n sbbq %2, %3" // %2 holds 00000000ffffffff 384"\n sbbq $0, %2" // adding 00000000ffffffff (in %1)
407"\n movq %3, 1*8(%1)" 385"\n addq %1, %2" // is the same as subtracting ffffffff00000001
408"\n" 386"\n movq %2, 3*8(%0)"
409"\n movq 2*8(%0), %3"
410"\n sbbq $0, %3"
411"\n movq %3, 2*8(%1)"
412"\n"
413"\n movq 3*8(%0), %3"
414"\n sbbq $0, %3" // adding 00000000ffffffff (in %2)
415"\n addq %2, %3" // is the same as subtracting ffffffff00000001
416"\n movq %3, 3*8(%1)"
417"\n" 387"\n"
418 : "=r" (a), "=r" (r), "=r" (ooff), "=r" (reg) 388 : "=r" (r), "=r" (ooff), "=r" (reg)
419 : "0" (a), "1" (r), "2" (0x00000000ffffffff) 389 : "0" (r), "1" (0x00000000ffffffff)
420 : "memory" 390 : "memory"
421 ); 391 );
422} 392}
423#else 393#else
424# define sp_256_sub_8_p256_mod(r, a) sp_256_sub_8((r), (a), p256_mod) 394static void sp_256_sub_8_p256_mod(sp_digit* r)
395{
396 sp_256_sub_8(r, r, p256_mod);
397}
425#endif 398#endif
426 399
427/* Multiply a and b into r. (r = a * b) */ 400/* Multiply a and b into r. (r = a * b) */
@@ -609,7 +582,7 @@ static void sp_256_mont_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b
609 int carry = sp_256_add_8(r, a, b); 582 int carry = sp_256_add_8(r, a, b);
610 sp_256_norm_8(r); 583 sp_256_norm_8(r);
611 if (carry) { 584 if (carry) {
612 sp_256_sub_8_p256_mod(r, r /*, m*/); 585 sp_256_sub_8_p256_mod(r);
613 sp_256_norm_8(r); 586 sp_256_norm_8(r);
614 } 587 }
615} 588}
@@ -637,7 +610,7 @@ static void sp_256_mont_dbl_8(sp_digit* r, const sp_digit* a /*, const sp_digit*
637 int carry = sp_256_add_8(r, a, a); 610 int carry = sp_256_add_8(r, a, a);
638 sp_256_norm_8(r); 611 sp_256_norm_8(r);
639 if (carry) 612 if (carry)
640 sp_256_sub_8_p256_mod(r, r /*, m*/); 613 sp_256_sub_8_p256_mod(r);
641 sp_256_norm_8(r); 614 sp_256_norm_8(r);
642} 615}
643 616
@@ -649,13 +622,13 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit*
649 int carry = sp_256_add_8(r, a, a); 622 int carry = sp_256_add_8(r, a, a);
650 sp_256_norm_8(r); 623 sp_256_norm_8(r);
651 if (carry) { 624 if (carry) {
652 sp_256_sub_8_p256_mod(r, r /*, m*/); 625 sp_256_sub_8_p256_mod(r);
653 sp_256_norm_8(r); 626 sp_256_norm_8(r);
654 } 627 }
655 carry = sp_256_add_8(r, r, a); 628 carry = sp_256_add_8(r, r, a);
656 sp_256_norm_8(r); 629 sp_256_norm_8(r);
657 if (carry) { 630 if (carry) {
658 sp_256_sub_8_p256_mod(r, r /*, m*/); 631 sp_256_sub_8_p256_mod(r);
659 sp_256_norm_8(r); 632 sp_256_norm_8(r);
660 } 633 }
661} 634}
@@ -829,7 +802,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
829 } 802 }
830 sp_256_mont_shift_8(a, a); 803 sp_256_mont_shift_8(a, a);
831 if (word16th != 0) 804 if (word16th != 0)
832 sp_256_sub_8_p256_mod(a, a /*, m*/); 805 sp_256_sub_8_p256_mod(a);
833 sp_256_norm_8(a); 806 sp_256_norm_8(a);
834 } 807 }
835 else { /* Same code for explicit mp == 1 (which is always the case for P256) */ 808 else { /* Same code for explicit mp == 1 (which is always the case for P256) */
@@ -849,7 +822,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
849 } 822 }
850 sp_256_mont_shift_8(a, a); 823 sp_256_mont_shift_8(a, a);
851 if (word16th != 0) 824 if (word16th != 0)
852 sp_256_sub_8_p256_mod(a, a /*, m*/); 825 sp_256_sub_8_p256_mod(a);
853 sp_256_norm_8(a); 826 sp_256_norm_8(a);
854 } 827 }
855} 828}
@@ -1104,7 +1077,7 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
1104 sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/); 1077 sp_256_mont_reduce_8(r->x /*, p256_mod, p256_mp_mod*/);
1105 /* Reduce x to less than modulus */ 1078 /* Reduce x to less than modulus */
1106 if (sp_256_cmp_8(r->x, p256_mod) >= 0) 1079 if (sp_256_cmp_8(r->x, p256_mod) >= 0)
1107 sp_256_sub_8_p256_mod(r->x, r->x /*, p256_mod*/); 1080 sp_256_sub_8_p256_mod(r->x);
1108 sp_256_norm_8(r->x); 1081 sp_256_norm_8(r->x);
1109 1082
1110 /* y /= z^3 */ 1083 /* y /= z^3 */
@@ -1113,7 +1086,7 @@ static void sp_256_map_8(sp_point* r, sp_point* p)
1113 sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/); 1086 sp_256_mont_reduce_8(r->y /*, p256_mod, p256_mp_mod*/);
1114 /* Reduce y to less than modulus */ 1087 /* Reduce y to less than modulus */
1115 if (sp_256_cmp_8(r->y, p256_mod) >= 0) 1088 if (sp_256_cmp_8(r->y, p256_mod) >= 0)
1116 sp_256_sub_8_p256_mod(r->y, r->y /*, p256_mod*/); 1089 sp_256_sub_8_p256_mod(r->y);
1117 sp_256_norm_8(r->y); 1090 sp_256_norm_8(r->y);
1118 1091
1119 memset(r->z, 0, sizeof(r->z)); 1092 memset(r->z, 0, sizeof(r->z));