aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2021-11-27 11:28:11 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2021-11-27 11:28:11 +0100
commit4bc9da10718df7ed9e992b1ddd2e80d53d894177 (patch)
tree3225ca484904b0f3d85a40cbbad02636b27f7aa7
parent15f7d618ea7f8c3a0277c98309268b709e20d77c (diff)
downloadbusybox-w32-4bc9da10718df7ed9e992b1ddd2e80d53d894177.tar.gz
busybox-w32-4bc9da10718df7ed9e992b1ddd2e80d53d894177.tar.bz2
busybox-w32-4bc9da10718df7ed9e992b1ddd2e80d53d894177.zip
tls: P256: 64-bit optimizations
function old new delta sp_256_proj_point_dbl_8 421 428 +7 sp_256_point_from_bin2x32 78 84 +6 sp_256_cmp_8 38 42 +4 sp_256_to_bin_8 28 31 +3 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 4/0 up/down: 20/0) Total: 20 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--include/platform.h2
-rw-r--r--networking/tls_sp_c32.c114
2 files changed, 101 insertions, 15 deletions
diff --git a/include/platform.h b/include/platform.h
index 9e1fb047d..ad27bb31a 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -239,6 +239,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING;
239# define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp)) 239# define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp))
240# define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p)) 240# define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p))
241# define move_from_unaligned32(v, u32p) ((v) = *(bb__aliased_uint32_t*)(u32p)) 241# define move_from_unaligned32(v, u32p) ((v) = *(bb__aliased_uint32_t*)(u32p))
242# define move_from_unaligned64(v, u64p) ((v) = *(bb__aliased_uint64_t*)(u64p))
242# define move_to_unaligned16(u16p, v) (*(bb__aliased_uint16_t*)(u16p) = (v)) 243# define move_to_unaligned16(u16p, v) (*(bb__aliased_uint16_t*)(u16p) = (v))
243# define move_to_unaligned32(u32p, v) (*(bb__aliased_uint32_t*)(u32p) = (v)) 244# define move_to_unaligned32(u32p, v) (*(bb__aliased_uint32_t*)(u32p) = (v))
244# define move_to_unaligned64(u64p, v) (*(bb__aliased_uint64_t*)(u64p) = (v)) 245# define move_to_unaligned64(u64p, v) (*(bb__aliased_uint64_t*)(u64p) = (v))
@@ -250,6 +251,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING;
250# define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long))) 251# define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long)))
251# define move_from_unaligned16(v, u16p) (memcpy(&(v), (u16p), 2)) 252# define move_from_unaligned16(v, u16p) (memcpy(&(v), (u16p), 2))
252# define move_from_unaligned32(v, u32p) (memcpy(&(v), (u32p), 4)) 253# define move_from_unaligned32(v, u32p) (memcpy(&(v), (u32p), 4))
254# define move_from_unaligned64(v, u64p) (memcpy(&(v), (u64p), 8))
253# define move_to_unaligned16(u16p, v) do { \ 255# define move_to_unaligned16(u16p, v) do { \
254 uint16_t __t = (v); \ 256 uint16_t __t = (v); \
255 memcpy((u16p), &__t, 2); \ 257 memcpy((u16p), &__t, 2); \
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 4d4ecdd74..d09f7e881 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -29,6 +29,20 @@ static void dump_hex(const char *fmt, const void *vp, int len)
29typedef uint32_t sp_digit; 29typedef uint32_t sp_digit;
30typedef int32_t signed_sp_digit; 30typedef int32_t signed_sp_digit;
31 31
32/* 64-bit optimizations:
33 * if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff,
34 * then loads and stores can be done in 64-bit chunks.
35 *
36 * A narrower case is when arch is also little-endian (such as x86_64),
37 * then "LSW first", uint32[8] and uint64[4] representations are equivalent,
38 * and arithmetic can be done in 64 bits too.
39 */
40#if defined(__GNUC__) && defined(__x86_64__)
41# define UNALIGNED_LE_64BIT 1
42#else
43# define UNALIGNED_LE_64BIT 0
44#endif
45
32/* The code below is taken from parts of 46/* The code below is taken from parts of
33 * wolfssl-3.15.3/wolfcrypt/src/sp_c32.c 47 * wolfssl-3.15.3/wolfcrypt/src/sp_c32.c
34 * and heavily modified. 48 * and heavily modified.
@@ -58,6 +72,22 @@ static const sp_digit p256_mod[8] = {
58 * r A single precision integer. 72 * r A single precision integer.
59 * a Byte array. 73 * a Byte array.
60 */ 74 */
75#if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff
76static void sp_256_to_bin_8(const sp_digit* rr, uint8_t* a)
77{
78 int i;
79 const uint64_t* r = (void*)rr;
80
81 sp_256_norm_8(rr);
82
83 r += 4;
84 for (i = 0; i < 4; i++) {
85 r--;
86 move_to_unaligned64(a, SWAP_BE64(*r));
87 a += 8;
88 }
89}
90#else
61static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a) 91static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a)
62{ 92{
63 int i; 93 int i;
@@ -71,6 +101,7 @@ static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a)
71 a += 4; 101 a += 4;
72 } 102 }
73} 103}
104#endif
74 105
75/* Read big endian unsigned byte array into r. 106/* Read big endian unsigned byte array into r.
76 * 107 *
@@ -78,6 +109,21 @@ static void sp_256_to_bin_8(const sp_digit* r, uint8_t* a)
78 * a Byte array. 109 * a Byte array.
79 * n Number of bytes in array to read. 110 * n Number of bytes in array to read.
80 */ 111 */
112#if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff
113static void sp_256_from_bin_8(sp_digit* rr, const uint8_t* a)
114{
115 int i;
116 uint64_t* r = (void*)rr;
117
118 r += 4;
119 for (i = 0; i < 4; i++) {
120 uint64_t v;
121 move_from_unaligned64(v, a);
122 *--r = SWAP_BE64(v);
123 a += 8;
124 }
125}
126#else
81static void sp_256_from_bin_8(sp_digit* r, const uint8_t* a) 127static void sp_256_from_bin_8(sp_digit* r, const uint8_t* a)
82{ 128{
83 int i; 129 int i;
@@ -90,6 +136,7 @@ static void sp_256_from_bin_8(sp_digit* r, const uint8_t* a)
90 a += 4; 136 a += 4;
91 } 137 }
92} 138}
139#endif
93 140
94#if SP_DEBUG 141#if SP_DEBUG
95static void dump_256(const char *fmt, const sp_digit* r) 142static void dump_256(const char *fmt, const sp_digit* r)
@@ -125,6 +172,20 @@ static void sp_256_point_from_bin2x32(sp_point* p, const uint8_t *bin2x32)
125 * return -ve, 0 or +ve if a is less than, equal to or greater than b 172 * return -ve, 0 or +ve if a is less than, equal to or greater than b
126 * respectively. 173 * respectively.
127 */ 174 */
175#if UNALIGNED_LE_64BIT
176static signed_sp_digit sp_256_cmp_8(const sp_digit* aa, const sp_digit* bb)
177{
178 const uint64_t* a = (void*)aa;
179 const uint64_t* b = (void*)bb;
180 int i;
181 for (i = 3; i >= 0; i--) {
182 if (a[i] == b[i])
183 continue;
184 return (a[i] > b[i]) * 2 - 1;
185 }
186 return 0;
187}
188#else
128static signed_sp_digit sp_256_cmp_8(const sp_digit* a, const sp_digit* b) 189static signed_sp_digit sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
129{ 190{
130 int i; 191 int i;
@@ -140,6 +201,7 @@ static signed_sp_digit sp_256_cmp_8(const sp_digit* a, const sp_digit* b)
140 } 201 }
141 return 0; 202 return 0;
142} 203}
204#endif
143 205
144/* Compare two numbers to determine if they are equal. 206/* Compare two numbers to determine if they are equal.
145 * 207 *
@@ -196,8 +258,6 @@ static int sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
196 ); 258 );
197 return reg; 259 return reg;
198#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) 260#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
199 /* x86_64 has no alignment restrictions, and is little-endian,
200 * so 64-bit and 32-bit representations are identical */
201 uint64_t reg; 261 uint64_t reg;
202 asm volatile ( 262 asm volatile (
203"\n movq (%0), %3" 263"\n movq (%0), %3"
@@ -294,8 +354,6 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
294 ); 354 );
295 return reg; 355 return reg;
296#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) 356#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
297 /* x86_64 has no alignment restrictions, and is little-endian,
298 * so 64-bit and 32-bit representations are identical */
299 uint64_t reg; 357 uint64_t reg;
300 asm volatile ( 358 asm volatile (
301"\n movq (%0), %3" 359"\n movq (%0), %3"
@@ -440,8 +498,6 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
440 r[15] = accl; 498 r[15] = accl;
441 memcpy(r, rr, sizeof(rr)); 499 memcpy(r, rr, sizeof(rr));
442#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) 500#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
443 /* x86_64 has no alignment restrictions, and is little-endian,
444 * so 64-bit and 32-bit representations are identical */
445 const uint64_t* aa = (const void*)a; 501 const uint64_t* aa = (const void*)a;
446 const uint64_t* bb = (const void*)b; 502 const uint64_t* bb = (const void*)b;
447 uint64_t rr[8]; 503 uint64_t rr[8];
@@ -551,17 +607,32 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
551} 607}
552 608
553/* Shift number right one bit. Bottom bit is lost. */ 609/* Shift number right one bit. Bottom bit is lost. */
554static void sp_256_rshift1_8(sp_digit* r, sp_digit* a, sp_digit carry) 610#if UNALIGNED_LE_64BIT
611static void sp_256_rshift1_8(sp_digit* rr, uint64_t carry)
612{
613 uint64_t *r = (void*)rr;
614 int i;
615
616 carry = (((uint64_t)!!carry) << 63);
617 for (i = 3; i >= 0; i--) {
618 uint64_t c = r[i] << 63;
619 r[i] = (r[i] >> 1) | carry;
620 carry = c;
621 }
622}
623#else
624static void sp_256_rshift1_8(sp_digit* r, sp_digit carry)
555{ 625{
556 int i; 626 int i;
557 627
558 carry = (!!carry << 31); 628 carry = (((sp_digit)!!carry) << 31);
559 for (i = 7; i >= 0; i--) { 629 for (i = 7; i >= 0; i--) {
560 sp_digit c = a[i] << 31; 630 sp_digit c = r[i] << 31;
561 r[i] = (a[i] >> 1) | carry; 631 r[i] = (r[i] >> 1) | carry;
562 carry = c; 632 carry = c;
563 } 633 }
564} 634}
635#endif
565 636
566/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) */ 637/* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m) */
567static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m) 638static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
@@ -570,7 +641,7 @@ static void sp_256_div2_8(sp_digit* r, const sp_digit* a, const sp_digit* m)
570 if (a[0] & 1) 641 if (a[0] & 1)
571 carry = sp_256_add_8(r, a, m); 642 carry = sp_256_add_8(r, a, m);
572 sp_256_norm_8(r); 643 sp_256_norm_8(r);
573 sp_256_rshift1_8(r, r, carry); 644 sp_256_rshift1_8(r, carry);
574} 645}
575 646
576/* Add two Montgomery form numbers (r = a + b % m) */ 647/* Add two Montgomery form numbers (r = a + b % m) */
@@ -634,15 +705,28 @@ static void sp_256_mont_tpl_8(sp_digit* r, const sp_digit* a /*, const sp_digit*
634} 705}
635 706
636/* Shift the result in the high 256 bits down to the bottom. */ 707/* Shift the result in the high 256 bits down to the bottom. */
637static void sp_256_mont_shift_8(sp_digit* r, const sp_digit* a) 708#if BB_UNALIGNED_MEMACCESS_OK && ULONG_MAX > 0xffffffff
709static void sp_256_mont_shift_8(sp_digit* rr)
710{
711 uint64_t *r = (void*)rr;
712 int i;
713
714 for (i = 0; i < 4; i++) {
715 r[i] = r[i+4];
716 r[i+4] = 0;
717 }
718}
719#else
720static void sp_256_mont_shift_8(sp_digit* r)
638{ 721{
639 int i; 722 int i;
640 723
641 for (i = 0; i < 8; i++) { 724 for (i = 0; i < 8; i++) {
642 r[i] = a[i+8]; 725 r[i] = r[i+8];
643 r[i+8] = 0; 726 r[i+8] = 0;
644 } 727 }
645} 728}
729#endif
646 730
647/* Mul a by scalar b and add into r. (r += a * b) */ 731/* Mul a by scalar b and add into r. (r += a * b) */
648static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/) 732static int sp_256_mul_add_8(sp_digit* r /*, const sp_digit* a, sp_digit b*/)
@@ -800,7 +884,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
800 goto inc_next_word0; 884 goto inc_next_word0;
801 } 885 }
802 } 886 }
803 sp_256_mont_shift_8(a, a); 887 sp_256_mont_shift_8(a);
804 if (word16th != 0) 888 if (word16th != 0)
805 sp_256_sub_8_p256_mod(a); 889 sp_256_sub_8_p256_mod(a);
806 sp_256_norm_8(a); 890 sp_256_norm_8(a);
@@ -820,7 +904,7 @@ static void sp_256_mont_reduce_8(sp_digit* a/*, const sp_digit* m, sp_digit mp*/
820 goto inc_next_word; 904 goto inc_next_word;
821 } 905 }
822 } 906 }
823 sp_256_mont_shift_8(a, a); 907 sp_256_mont_shift_8(a);
824 if (word16th != 0) 908 if (word16th != 0)
825 sp_256_sub_8_p256_mod(a); 909 sp_256_sub_8_p256_mod(a);
826 sp_256_norm_8(a); 910 sp_256_norm_8(a);