diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2021-12-30 18:54:02 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2021-12-30 18:54:02 +0100 |
commit | 0b62a08777e29c34f947c791a1eded5b97e05699 (patch) | |
tree | c411bf4bd5f5d2dd6821287696b5866f595134fe | |
parent | 25aadc893d21b35f7d34a9d1edc843632e7abd8f (diff) | |
download | busybox-w32-0b62a08777e29c34f947c791a1eded5b97e05699.tar.gz busybox-w32-0b62a08777e29c34f947c791a1eded5b97e05699.tar.bz2 busybox-w32-0b62a08777e29c34f947c791a1eded5b97e05699.zip |
libbb/sha1: add config-selectable partially unrolled version
function old new delta
sha1_process_block64 364 732 +368
static.rconsts 16 - -16
------------------------------------------------------------------------------
(add/remove: 0/1 grow/shrink: 1/0 up/down: 368/-16) Total: 352 bytes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r-- | libbb/Config.src | 3 | ||||
-rw-r--r-- | libbb/hash_md5_sha.c | 100 |
2 files changed, 98 insertions, 5 deletions
diff --git a/libbb/Config.src b/libbb/Config.src index 13188ef03..c793f5939 100644 --- a/libbb/Config.src +++ b/libbb/Config.src | |||
@@ -60,7 +60,8 @@ config SHA1_SMALL | |||
60 | throughput MB/s size of sha1_process_block64 | 60 | throughput MB/s size of sha1_process_block64 |
61 | value 486 x86-64 486 x86-64 | 61 | value 486 x86-64 486 x86-64 |
62 | 0 339 374 4149 4167 | 62 | 0 339 374 4149 4167 |
63 | 1,2,3 200 195 358 380 | 63 | 1 224 229 654 732 |
64 | 2,3 200 195 358 380 | ||
64 | 65 | ||
65 | config SHA3_SMALL | 66 | config SHA3_SMALL |
66 | int "SHA3: Trade bytes for speed (0:fast, 1:slow)" | 67 | int "SHA3: Trade bytes for speed (0:fast, 1:slow)" |
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 75673e334..053ebe291 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c | |||
@@ -514,9 +514,9 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) | |||
514 | do { \ | 514 | do { \ |
515 | uint32_t work = EXPR(B, C, D); \ | 515 | uint32_t work = EXPR(B, C, D); \ |
516 | if (n <= 15) \ | 516 | if (n <= 15) \ |
517 | work += W[n & 0xf] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); \ | 517 | work += W[n & 15] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); \ |
518 | if (n >= 16) \ | 518 | if (n >= 16) \ |
519 | work += W[n & 0xf] = rotl32(W[(n+13) & 0xf] ^ W[(n+8) & 0xf] ^ W[(n+2) & 0xf] ^ W[n & 0xf], 1); \ | 519 | work += W[n & 15] = rotl32(W[(n+13) & 15] ^ W[(n+8) & 15] ^ W[(n+2) & 15] ^ W[n & 15], 1); \ |
520 | E += work + rotl32(A, 5) + rconsts[n / 20]; \ | 520 | E += work + rotl32(A, 5) + rconsts[n / 20]; \ |
521 | B = rotl32(B, 30); \ | 521 | B = rotl32(B, 30); \ |
522 | } while (0) | 522 | } while (0) |
@@ -549,9 +549,101 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) | |||
549 | ctx->hash[3] += d; | 549 | ctx->hash[3] += d; |
550 | ctx->hash[4] += e; | 550 | ctx->hash[4] += e; |
551 | } | 551 | } |
552 | #else | 552 | #elif CONFIG_SHA1_SMALL == 1 |
553 | /* TODO: for CONFIG_SHA1_SMALL == 1, have a partially unrolled version? */ | 553 | /* Middle-sized version, +300 bytes of code on x86. */ |
554 | static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) | ||
555 | { | ||
556 | static const uint32_t rconsts[] ALIGN4 = { | ||
557 | 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6 | ||
558 | }; | ||
559 | int j; | ||
560 | int n; | ||
561 | uint32_t W[16+16]; | ||
562 | uint32_t a, b, c, d, e; | ||
563 | |||
564 | a = ctx->hash[0]; | ||
565 | b = ctx->hash[1]; | ||
566 | c = ctx->hash[2]; | ||
567 | d = ctx->hash[3]; | ||
568 | e = ctx->hash[4]; | ||
569 | |||
570 | /* 1st round of 20 operations */ | ||
571 | n = 0; | ||
572 | do { | ||
573 | uint32_t work = ((c ^ d) & b) ^ d; | ||
574 | W[n] = W[n+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); | ||
575 | work += W[n]; | ||
576 | work += e + rotl32(a, 5) + rconsts[0]; | ||
577 | /* Rotate by one for next time */ | ||
578 | e = d; | ||
579 | d = c; | ||
580 | c = rotl32(b, 30); | ||
581 | b = a; | ||
582 | a = work; | ||
583 | n = (n + 1) & 15; | ||
584 | } while (n != 0); | ||
585 | do { | ||
586 | uint32_t work = ((c ^ d) & b) ^ d; | ||
587 | W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1); | ||
588 | work += W[n]; | ||
589 | work += e + rotl32(a, 5) + rconsts[0]; | ||
590 | e = d; | ||
591 | d = c; | ||
592 | c = rotl32(b, 30); | ||
593 | b = a; | ||
594 | a = work; | ||
595 | n = (n + 1) & 15; | ||
596 | } while (n != 4); | ||
597 | /* 2nd round of 20 operations */ | ||
598 | j = 19; | ||
599 | do { | ||
600 | uint32_t work = c ^ d ^ b; | ||
601 | W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1); | ||
602 | work += W[n]; | ||
603 | work += e + rotl32(a, 5) + rconsts[1]; | ||
604 | e = d; | ||
605 | d = c; | ||
606 | c = rotl32(b, 30); | ||
607 | b = a; | ||
608 | a = work; | ||
609 | n = (n + 1) & 15; | ||
610 | } while (--j >= 0); | ||
611 | /* 3rd round */ | ||
612 | j = 19; | ||
613 | do { | ||
614 | uint32_t work = ((b | c) & d) | (b & c); | ||
615 | W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1); | ||
616 | work += W[n]; | ||
617 | work += e + rotl32(a, 5) + rconsts[2]; | ||
618 | e = d; | ||
619 | d = c; | ||
620 | c = rotl32(b, 30); | ||
621 | b = a; | ||
622 | a = work; | ||
623 | n = (n + 1) & 15; | ||
624 | } while (--j >= 0); | ||
625 | /* 4th round */ | ||
626 | j = 19; | ||
627 | do { | ||
628 | uint32_t work = c ^ d ^ b; | ||
629 | W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1); | ||
630 | work += W[n]; | ||
631 | work += e + rotl32(a, 5) + rconsts[3]; | ||
632 | e = d; | ||
633 | d = c; | ||
634 | c = rotl32(b, 30); | ||
635 | b = a; | ||
636 | a = work; | ||
637 | n = (n + 1) & 15; | ||
638 | } while (--j >= 0); | ||
554 | 639 | ||
640 | ctx->hash[0] += a; | ||
641 | ctx->hash[1] += b; | ||
642 | ctx->hash[2] += c; | ||
643 | ctx->hash[3] += d; | ||
644 | ctx->hash[4] += e; | ||
645 | } | ||
646 | #else | ||
555 | /* Compact version, almost twice as slow as fully unrolled */ | 647 | /* Compact version, almost twice as slow as fully unrolled */ |
556 | static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) | 648 | static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) |
557 | { | 649 | { |