aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2021-10-06 17:17:34 +0200
committerDenys Vlasenko <vda.linux@googlemail.com>2021-10-06 17:17:34 +0200
commit911344a99889319a7dba8a725a64dc324597f9eb (patch)
tree7273dd7914feb9b87a1b9e692a0dc432bb31ae96
parent22fd8fd3f4c271d6037753165131c7c35a039762 (diff)
downloadbusybox-w32-911344a99889319a7dba8a725a64dc324597f9eb.tar.gz
busybox-w32-911344a99889319a7dba8a725a64dc324597f9eb.tar.bz2
busybox-w32-911344a99889319a7dba8a725a64dc324597f9eb.zip
tls: P256: x86-64 assembly
function old new delta sp_256_mont_mul_8 127 155 +28 sp_256_proj_point_dbl_8 448 469 +21 sp_256_mont_sub_8 23 35 +12 sp_256_mont_dbl_8 26 38 +12 sp_256_sub_8 44 49 +5 sp_256_ecc_mulmod_8 1530 1535 +5 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 6/0 up/down: 83/0) Total: 83 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--networking/tls_sp_c32.c99
1 files changed, 99 insertions, 0 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 532047739..14a7c7066 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -195,6 +195,34 @@ static int sp_256_add_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
195 : "memory" 195 : "memory"
196 ); 196 );
197 return reg; 197 return reg;
198#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
199 /* x86_64 has no alignment restrictions, and is little-endian,
200 * so 64-bit and 32-bit representations are identical */
201 uint64_t reg;
202 asm volatile (
203"\n movq (%0), %3"
204"\n addq (%1), %3"
205"\n movq %3, (%2)"
206"\n"
207"\n movq 1*8(%0), %3"
208"\n adcq 1*8(%1), %3"
209"\n movq %3, 1*8(%2)"
210"\n"
211"\n movq 2*8(%0), %3"
212"\n adcq 2*8(%1), %3"
213"\n movq %3, 2*8(%2)"
214"\n"
215"\n movq 3*8(%0), %3"
216"\n adcq 3*8(%1), %3"
217"\n movq %3, 3*8(%2)"
218"\n"
219"\n sbbq %3, %3"
220"\n"
221 : "=r" (a), "=r" (b), "=r" (r), "=r" (reg)
222 : "0" (a), "1" (b), "2" (r)
223 : "memory"
224 );
225 return reg;
198#else 226#else
199 int i; 227 int i;
200 sp_digit carry; 228 sp_digit carry;
@@ -265,6 +293,34 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
265 : "memory" 293 : "memory"
266 ); 294 );
267 return reg; 295 return reg;
296#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
297 /* x86_64 has no alignment restrictions, and is little-endian,
298 * so 64-bit and 32-bit representations are identical */
299 uint64_t reg;
300 asm volatile (
301"\n movq (%0), %3"
302"\n subq (%1), %3"
303"\n movq %3, (%2)"
304"\n"
305"\n movq 1*8(%0), %3"
306"\n sbbq 1*8(%1), %3"
307"\n movq %3, 1*8(%2)"
308"\n"
309"\n movq 2*8(%0), %3"
310"\n sbbq 2*8(%1), %3"
311"\n movq %3, 2*8(%2)"
312"\n"
313"\n movq 3*8(%0), %3"
314"\n sbbq 3*8(%1), %3"
315"\n movq %3, 3*8(%2)"
316"\n"
317"\n sbbq %3, %3"
318"\n"
319 : "=r" (a), "=r" (b), "=r" (r), "=r" (reg)
320 : "0" (a), "1" (b), "2" (r)
321 : "memory"
322 );
323 return reg;
268#else 324#else
269 int i; 325 int i;
270 sp_digit borrow; 326 sp_digit borrow;
@@ -380,6 +436,49 @@ static void sp_256_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
380 } 436 }
381 r[15] = accl; 437 r[15] = accl;
382 memcpy(r, rr, sizeof(rr)); 438 memcpy(r, rr, sizeof(rr));
439#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
440 /* x86_64 has no alignment restrictions, and is little-endian,
441 * so 64-bit and 32-bit representations are identical */
442 const uint64_t* aa = (const void*)a;
443 const uint64_t* bb = (const void*)b;
444 uint64_t rr[8];
445 int k;
446 uint64_t accl;
447 uint64_t acch;
448
449 acch = accl = 0;
450 for (k = 0; k < 7; k++) {
451 int i, j;
452 uint64_t acc_hi;
453 i = k - 3;
454 if (i < 0)
455 i = 0;
456 j = k - i;
457 acc_hi = 0;
458 do {
459////////////////////////
460// uint128_t m = ((uint128_t)a[i]) * b[j];
461// acc_hi:acch:accl += m;
462 asm volatile (
463 // aa[i] is already loaded in %%rax
464"\n mulq %7"
465"\n addq %%rax, %0"
466"\n adcq %%rdx, %1"
467"\n adcq $0, %2"
468 : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi)
469 : "0" (accl), "1" (acch), "2" (acc_hi), "a" (aa[i]), "m" (bb[j])
470 : "cc", "dx"
471 );
472////////////////////////
473 j--;
474 i++;
475 } while (i != 4 && i <= k);
476 rr[k] = accl;
477 accl = acch;
478 acch = acc_hi;
479 }
480 rr[7] = accl;
481 memcpy(r, rr, sizeof(rr));
383#elif 0 482#elif 0
384 //TODO: arm assembly (untested) 483 //TODO: arm assembly (untested)
385 sp_digit tmp[16]; 484 sp_digit tmp[16];