aboutsummaryrefslogtreecommitdiff
path: root/networking
diff options
context:
space:
mode:
authorRon Yorston <rmy@pobox.com>2024-07-13 08:29:09 +0100
committerRon Yorston <rmy@pobox.com>2024-07-13 08:29:09 +0100
commitb18891bba511d4fc4fcd0a6ff5cd2df31a086f1b (patch)
treeef78f9ecc339d6ab95eed03f787f058f270b8772 /networking
parent684dabdb8452b3d33d5d6265f3d7ef32c10f5307 (diff)
parent23da5c4b716b92524240c6f81c2e2474c1825cfc (diff)
downloadbusybox-w32-b18891bba511d4fc4fcd0a6ff5cd2df31a086f1b.tar.gz
busybox-w32-b18891bba511d4fc4fcd0a6ff5cd2df31a086f1b.tar.bz2
busybox-w32-b18891bba511d4fc4fcd0a6ff5cd2df31a086f1b.zip
Merge branch 'busybox' into merge
Diffstat (limited to 'networking')
-rw-r--r--networking/tls_sp_c32.c91
1 files changed, 62 insertions, 29 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 999033034..e493c436a 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -411,10 +411,10 @@ static void sp_256_sub_8_p256_mod(sp_digit* r)
411"\n subl $0xffffffff, (%0)" 411"\n subl $0xffffffff, (%0)"
412"\n sbbl $0xffffffff, 1*4(%0)" 412"\n sbbl $0xffffffff, 1*4(%0)"
413"\n sbbl $0xffffffff, 2*4(%0)" 413"\n sbbl $0xffffffff, 2*4(%0)"
414"\n sbbl $0, 3*4(%0)" 414"\n sbbl $0x00000000, 3*4(%0)"
415"\n sbbl $0, 4*4(%0)" 415"\n sbbl $0x00000000, 4*4(%0)"
416"\n sbbl $0, 5*4(%0)" 416"\n sbbl $0x00000000, 5*4(%0)"
417"\n sbbl $1, 6*4(%0)" 417"\n sbbl $0x00000001, 6*4(%0)"
418"\n sbbl $0xffffffff, 7*4(%0)" 418"\n sbbl $0xffffffff, 7*4(%0)"
419"\n" 419"\n"
420 : "=r" (r) 420 : "=r" (r)
@@ -422,29 +422,48 @@ static void sp_256_sub_8_p256_mod(sp_digit* r)
422 : "memory" 422 : "memory"
423 ); 423 );
424} 424}
425#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) && ENABLE_PLATFORM_POSIX 425#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
426static void sp_256_sub_8_p256_mod(sp_digit* r) 426static void sp_256_sub_8_p256_mod(sp_digit* r)
427{ 427{
428//p256_mod[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff
429# if 0
430 // gcc -Oz bug (?) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115875
431 // uses buggy "push $-1; pop %rax" insns to load 00000000ffffffff
428 uint64_t reg; 432 uint64_t reg;
429 uint64_t ooff; 433 uint64_t ooff;
430//p256_mod[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff
431 asm volatile ( 434 asm volatile (
432"\n addq $1, (%0)" // adding 1 is the same as subtracting ffffffffffffffff 435"\n subq $0xffffffffffffffff, (%0)"
433"\n cmc" // only carry bit needs inverting 436"\n sbbq %1, 1*8(%0)" // %1 = 00000000ffffffff
434"\n" 437"\n sbbq $0x0000000000000000, 2*8(%0)"
435"\n sbbq %1, 1*8(%0)" // %1 holds 00000000ffffffff
436"\n"
437"\n sbbq $0, 2*8(%0)"
438"\n"
439"\n movq 3*8(%0), %2" 438"\n movq 3*8(%0), %2"
440"\n sbbq $0, %2" // adding 00000000ffffffff (in %1) 439"\n sbbq $0x0, %2" // subtract carry
441"\n addq %1, %2" // is the same as subtracting ffffffff00000001 440"\n addq %1, %2" // adding 00000000ffffffff (in %1)
441"\n" // is the same as subtracting ffffffff00000001
442"\n movq %2, 3*8(%0)" 442"\n movq %2, 3*8(%0)"
443"\n" 443"\n"
444 : "=r" (r), "=r" (ooff), "=r" (reg) 444 : "=r" (r), "=r" (ooff), "=r" (reg)
445 : "0" (r), "1" (0x00000000ffffffff) 445 : "0" (r), "1" (0x00000000ffffffffUL) /* UL is important! */
446 : "memory"
447 );
448# else // let's do it by hand:
449 uint64_t reg;
450 uint64_t rax;
451 asm volatile (
452"\n orl $0xffffffff, %%eax" // %1 (rax) = 00000000ffffffff
453"\n subq $0xffffffffffffffff, (%0)"
454"\n sbbq %1, 1*8(%0)"
455"\n sbbq $0x0000000000000000, 2*8(%0)"
456"\n movq 3*8(%0), %2"
457"\n sbbq $0x0, %2" // subtract carry
458"\n addq %1, %2" // adding 00000000ffffffff (in %1)
459"\n" // is the same as subtracting ffffffff00000001
460"\n movq %2, 3*8(%0)"
461"\n"
462 : "=r" (r), "=&a" (rax), "=r" (reg)
463 : "0" (r)
446 : "memory" 464 : "memory"
447 ); 465 );
466# endif
448} 467}
449#else 468#else
450static void sp_256_sub_8_p256_mod(sp_digit* r) 469static void sp_256_sub_8_p256_mod(sp_digit* r)
@@ -476,15 +495,23 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
476//////////////////////// 495////////////////////////
477// uint64_t m = ((uint64_t)a[i]) * b[j]; 496// uint64_t m = ((uint64_t)a[i]) * b[j];
478// acc_hi:acch:accl += m; 497// acc_hi:acch:accl += m;
498 long eax_clobbered;
479 asm volatile ( 499 asm volatile (
480 // a[i] is already loaded in %%eax 500 // a[i] is already loaded in %%eax
481"\n mull %7" 501"\n mull %8"
482"\n addl %%eax, %0" 502"\n addl %%eax, %0"
483"\n adcl %%edx, %1" 503"\n adcl %%edx, %1"
484"\n adcl $0, %2" 504"\n adcl $0x0, %2"
485 : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi) 505 : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi), "=a" (eax_clobbered)
486 : "0" (accl), "1" (acch), "2" (acc_hi), "a" (a[i]), "m" (b[j]) 506 : "0" (accl), "1" (acch), "2" (acc_hi), "3" (a[i]), "m" (b[j])
487 : "cc", "dx" 507 : "cc", "dx"
508// What is "eax_clobbered"? gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html:
509// "Do not modify the contents of input-only operands (except for inputs tied
510// to outputs). The compiler assumes that on exit from the asm statement these
511// operands contain the same values as they had before executing the statement.
512// It is not possible to use clobbers to inform the compiler that the values
513// in these inputs are changing. One common work-around is to tie the changing
514// input variable to an output variable that never gets used."
488 ); 515 );
489//////////////////////// 516////////////////////////
490 j--; 517 j--;
@@ -500,15 +527,20 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
500 const uint64_t* bb = (const void*)b; 527 const uint64_t* bb = (const void*)b;
501 uint64_t* rr = (void*)r; 528 uint64_t* rr = (void*)r;
502 int k; 529 int k;
503 uint64_t accl; 530 register uint64_t accl asm("r8");
504 uint64_t acch; 531 register uint64_t acch asm("r9");
532 /* ^^^ ask gcc to not use rax/rdx/input arg regs for accumulator variables */
533 /* (or else it may generate lots of silly mov's and even xchg's!) */
505 534
506 acch = accl = 0; 535 acch = accl = 0;
507 for (k = 0; k < 7; k++) { 536 for (k = 0; k < 7; k++) {
508 int i, j; 537 unsigned i, j;
509 uint64_t acc_hi; 538 /* ^^^^^ not signed "int",
539 * or gcc can use a temp register to sign-extend i,j for aa[i],bb[j] */
540 register uint64_t acc_hi asm("r10");
541 /* ^^^ ask gcc to not use rax/rdx/input arg regs for accumulators */
510 i = k - 3; 542 i = k - 3;
511 if (i < 0) 543 if ((int)i < 0)
512 i = 0; 544 i = 0;
513 j = k - i; 545 j = k - i;
514 acc_hi = 0; 546 acc_hi = 0;
@@ -516,14 +548,15 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
516//////////////////////// 548////////////////////////
517// uint128_t m = ((uint128_t)a[i]) * b[j]; 549// uint128_t m = ((uint128_t)a[i]) * b[j];
518// acc_hi:acch:accl += m; 550// acc_hi:acch:accl += m;
551 long rax_clobbered;
519 asm volatile ( 552 asm volatile (
520 // aa[i] is already loaded in %%rax 553 // aa[i] is already loaded in %%rax
521"\n mulq %7" 554"\n mulq %8"
522"\n addq %%rax, %0" 555"\n addq %%rax, %0"
523"\n adcq %%rdx, %1" 556"\n adcq %%rdx, %1"
524"\n adcq $0, %2" 557"\n adcq $0x0, %2"
525 : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi) 558 : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi), "=a" (rax_clobbered)
526 : "0" (accl), "1" (acch), "2" (acc_hi), "a" (aa[i]), "m" (bb[j]) 559 : "0" (accl), "1" (acch), "2" (acc_hi), "3" (aa[i]), "m" (bb[j])
527 : "cc", "dx" 560 : "cc", "dx"
528 ); 561 );
529//////////////////////// 562////////////////////////