diff options
author | Ron Yorston <rmy@pobox.com> | 2024-07-13 08:29:09 +0100 |
---|---|---|
committer | Ron Yorston <rmy@pobox.com> | 2024-07-13 08:29:09 +0100 |
commit | b18891bba511d4fc4fcd0a6ff5cd2df31a086f1b (patch) | |
tree | ef78f9ecc339d6ab95eed03f787f058f270b8772 /networking | |
parent | 684dabdb8452b3d33d5d6265f3d7ef32c10f5307 (diff) | |
parent | 23da5c4b716b92524240c6f81c2e2474c1825cfc (diff) | |
download | busybox-w32-b18891bba511d4fc4fcd0a6ff5cd2df31a086f1b.tar.gz busybox-w32-b18891bba511d4fc4fcd0a6ff5cd2df31a086f1b.tar.bz2 busybox-w32-b18891bba511d4fc4fcd0a6ff5cd2df31a086f1b.zip |
Merge branch 'busybox' into merge
Diffstat (limited to 'networking')
-rw-r--r-- | networking/tls_sp_c32.c | 91 |
1 files changed, 62 insertions, 29 deletions
diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c index 999033034..e493c436a 100644 --- a/networking/tls_sp_c32.c +++ b/networking/tls_sp_c32.c | |||
@@ -411,10 +411,10 @@ static void sp_256_sub_8_p256_mod(sp_digit* r) | |||
411 | "\n subl $0xffffffff, (%0)" | 411 | "\n subl $0xffffffff, (%0)" |
412 | "\n sbbl $0xffffffff, 1*4(%0)" | 412 | "\n sbbl $0xffffffff, 1*4(%0)" |
413 | "\n sbbl $0xffffffff, 2*4(%0)" | 413 | "\n sbbl $0xffffffff, 2*4(%0)" |
414 | "\n sbbl $0, 3*4(%0)" | 414 | "\n sbbl $0x00000000, 3*4(%0)" |
415 | "\n sbbl $0, 4*4(%0)" | 415 | "\n sbbl $0x00000000, 4*4(%0)" |
416 | "\n sbbl $0, 5*4(%0)" | 416 | "\n sbbl $0x00000000, 5*4(%0)" |
417 | "\n sbbl $1, 6*4(%0)" | 417 | "\n sbbl $0x00000001, 6*4(%0)" |
418 | "\n sbbl $0xffffffff, 7*4(%0)" | 418 | "\n sbbl $0xffffffff, 7*4(%0)" |
419 | "\n" | 419 | "\n" |
420 | : "=r" (r) | 420 | : "=r" (r) |
@@ -422,29 +422,48 @@ static void sp_256_sub_8_p256_mod(sp_digit* r) | |||
422 | : "memory" | 422 | : "memory" |
423 | ); | 423 | ); |
424 | } | 424 | } |
425 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) && ENABLE_PLATFORM_POSIX | 425 | #elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__) |
426 | static void sp_256_sub_8_p256_mod(sp_digit* r) | 426 | static void sp_256_sub_8_p256_mod(sp_digit* r) |
427 | { | 427 | { |
428 | //p256_mod[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff | ||
429 | # if 0 | ||
430 | // gcc -Oz bug (?) https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115875 | ||
431 | // uses buggy "push $-1; pop %rax" insns to load 00000000ffffffff | ||
428 | uint64_t reg; | 432 | uint64_t reg; |
429 | uint64_t ooff; | 433 | uint64_t ooff; |
430 | //p256_mod[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff | ||
431 | asm volatile ( | 434 | asm volatile ( |
432 | "\n addq $1, (%0)" // adding 1 is the same as subtracting ffffffffffffffff | 435 | "\n subq $0xffffffffffffffff, (%0)" |
433 | "\n cmc" // only carry bit needs inverting | 436 | "\n sbbq %1, 1*8(%0)" // %1 = 00000000ffffffff |
434 | "\n" | 437 | "\n sbbq $0x0000000000000000, 2*8(%0)" |
435 | "\n sbbq %1, 1*8(%0)" // %1 holds 00000000ffffffff | ||
436 | "\n" | ||
437 | "\n sbbq $0, 2*8(%0)" | ||
438 | "\n" | ||
439 | "\n movq 3*8(%0), %2" | 438 | "\n movq 3*8(%0), %2" |
440 | "\n sbbq $0, %2" // adding 00000000ffffffff (in %1) | 439 | "\n sbbq $0x0, %2" // subtract carry |
441 | "\n addq %1, %2" // is the same as subtracting ffffffff00000001 | 440 | "\n addq %1, %2" // adding 00000000ffffffff (in %1) |
441 | "\n" // is the same as subtracting ffffffff00000001 | ||
442 | "\n movq %2, 3*8(%0)" | 442 | "\n movq %2, 3*8(%0)" |
443 | "\n" | 443 | "\n" |
444 | : "=r" (r), "=r" (ooff), "=r" (reg) | 444 | : "=r" (r), "=r" (ooff), "=r" (reg) |
445 | : "0" (r), "1" (0x00000000ffffffff) | 445 | : "0" (r), "1" (0x00000000ffffffffUL) /* UL is important! */ |
446 | : "memory" | ||
447 | ); | ||
448 | # else // let's do it by hand: | ||
449 | uint64_t reg; | ||
450 | uint64_t rax; | ||
451 | asm volatile ( | ||
452 | "\n orl $0xffffffff, %%eax" // %1 (rax) = 00000000ffffffff | ||
453 | "\n subq $0xffffffffffffffff, (%0)" | ||
454 | "\n sbbq %1, 1*8(%0)" | ||
455 | "\n sbbq $0x0000000000000000, 2*8(%0)" | ||
456 | "\n movq 3*8(%0), %2" | ||
457 | "\n sbbq $0x0, %2" // subtract carry | ||
458 | "\n addq %1, %2" // adding 00000000ffffffff (in %1) | ||
459 | "\n" // is the same as subtracting ffffffff00000001 | ||
460 | "\n movq %2, 3*8(%0)" | ||
461 | "\n" | ||
462 | : "=r" (r), "=&a" (rax), "=r" (reg) | ||
463 | : "0" (r) | ||
446 | : "memory" | 464 | : "memory" |
447 | ); | 465 | ); |
466 | # endif | ||
448 | } | 467 | } |
449 | #else | 468 | #else |
450 | static void sp_256_sub_8_p256_mod(sp_digit* r) | 469 | static void sp_256_sub_8_p256_mod(sp_digit* r) |
@@ -476,15 +495,23 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
476 | //////////////////////// | 495 | //////////////////////// |
477 | // uint64_t m = ((uint64_t)a[i]) * b[j]; | 496 | // uint64_t m = ((uint64_t)a[i]) * b[j]; |
478 | // acc_hi:acch:accl += m; | 497 | // acc_hi:acch:accl += m; |
498 | long eax_clobbered; | ||
479 | asm volatile ( | 499 | asm volatile ( |
480 | // a[i] is already loaded in %%eax | 500 | // a[i] is already loaded in %%eax |
481 | "\n mull %7" | 501 | "\n mull %8" |
482 | "\n addl %%eax, %0" | 502 | "\n addl %%eax, %0" |
483 | "\n adcl %%edx, %1" | 503 | "\n adcl %%edx, %1" |
484 | "\n adcl $0, %2" | 504 | "\n adcl $0x0, %2" |
485 | : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi) | 505 | : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi), "=a" (eax_clobbered) |
486 | : "0" (accl), "1" (acch), "2" (acc_hi), "a" (a[i]), "m" (b[j]) | 506 | : "0" (accl), "1" (acch), "2" (acc_hi), "3" (a[i]), "m" (b[j]) |
487 | : "cc", "dx" | 507 | : "cc", "dx" |
508 | // What is "eax_clobbered"? gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html: | ||
509 | // "Do not modify the contents of input-only operands (except for inputs tied | ||
510 | // to outputs). The compiler assumes that on exit from the asm statement these | ||
511 | // operands contain the same values as they had before executing the statement. | ||
512 | // It is not possible to use clobbers to inform the compiler that the values | ||
513 | // in these inputs are changing. One common work-around is to tie the changing | ||
514 | // input variable to an output variable that never gets used." | ||
488 | ); | 515 | ); |
489 | //////////////////////// | 516 | //////////////////////// |
490 | j--; | 517 | j--; |
@@ -500,15 +527,20 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
500 | const uint64_t* bb = (const void*)b; | 527 | const uint64_t* bb = (const void*)b; |
501 | uint64_t* rr = (void*)r; | 528 | uint64_t* rr = (void*)r; |
502 | int k; | 529 | int k; |
503 | uint64_t accl; | 530 | register uint64_t accl asm("r8"); |
504 | uint64_t acch; | 531 | register uint64_t acch asm("r9"); |
532 | /* ^^^ ask gcc to not use rax/rdx/input arg regs for accumulator variables */ | ||
533 | /* (or else it may generate lots of silly mov's and even xchg's!) */ | ||
505 | 534 | ||
506 | acch = accl = 0; | 535 | acch = accl = 0; |
507 | for (k = 0; k < 7; k++) { | 536 | for (k = 0; k < 7; k++) { |
508 | int i, j; | 537 | unsigned i, j; |
509 | uint64_t acc_hi; | 538 | /* ^^^^^ not signed "int", |
539 | * or gcc can use a temp register to sign-extend i,j for aa[i],bb[j] */ | ||
540 | register uint64_t acc_hi asm("r10"); | ||
541 | /* ^^^ ask gcc to not use rax/rdx/input arg regs for accumulators */ | ||
510 | i = k - 3; | 542 | i = k - 3; |
511 | if (i < 0) | 543 | if ((int)i < 0) |
512 | i = 0; | 544 | i = 0; |
513 | j = k - i; | 545 | j = k - i; |
514 | acc_hi = 0; | 546 | acc_hi = 0; |
@@ -516,14 +548,15 @@ static void sp_256to512_mul_8(sp_digit* r, const sp_digit* a, const sp_digit* b) | |||
516 | //////////////////////// | 548 | //////////////////////// |
517 | // uint128_t m = ((uint128_t)a[i]) * b[j]; | 549 | // uint128_t m = ((uint128_t)a[i]) * b[j]; |
518 | // acc_hi:acch:accl += m; | 550 | // acc_hi:acch:accl += m; |
551 | long rax_clobbered; | ||
519 | asm volatile ( | 552 | asm volatile ( |
520 | // aa[i] is already loaded in %%rax | 553 | // aa[i] is already loaded in %%rax |
521 | "\n mulq %7" | 554 | "\n mulq %8" |
522 | "\n addq %%rax, %0" | 555 | "\n addq %%rax, %0" |
523 | "\n adcq %%rdx, %1" | 556 | "\n adcq %%rdx, %1" |
524 | "\n adcq $0, %2" | 557 | "\n adcq $0x0, %2" |
525 | : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi) | 558 | : "=rm" (accl), "=rm" (acch), "=rm" (acc_hi), "=a" (rax_clobbered) |
526 | : "0" (accl), "1" (acch), "2" (acc_hi), "a" (aa[i]), "m" (bb[j]) | 559 | : "0" (accl), "1" (acch), "2" (acc_hi), "3" (aa[i]), "m" (bb[j]) |
527 | : "cc", "dx" | 560 | : "cc", "dx" |
528 | ); | 561 | ); |
529 | //////////////////////// | 562 | //////////////////////// |