aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2021-12-30 18:54:02 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2021-12-30 18:54:02 +0100
commit0b62a08777e29c34f947c791a1eded5b97e05699 (patch)
treec411bf4bd5f5d2dd6821287696b5866f595134fe
parent25aadc893d21b35f7d34a9d1edc843632e7abd8f (diff)
downloadbusybox-w32-0b62a08777e29c34f947c791a1eded5b97e05699.tar.gz
busybox-w32-0b62a08777e29c34f947c791a1eded5b97e05699.tar.bz2
busybox-w32-0b62a08777e29c34f947c791a1eded5b97e05699.zip
libbb/sha1: add config-selectable partially unrolled version
function old new delta sha1_process_block64 364 732 +368 static.rconsts 16 - -16 ------------------------------------------------------------------------------ (add/remove: 0/1 grow/shrink: 1/0 up/down: 368/-16) Total: 352 bytes Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--libbb/Config.src3
-rw-r--r--libbb/hash_md5_sha.c100
2 files changed, 98 insertions, 5 deletions
diff --git a/libbb/Config.src b/libbb/Config.src
index 13188ef03..c793f5939 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -60,7 +60,8 @@ config SHA1_SMALL
60 throughput MB/s size of sha1_process_block64 60 throughput MB/s size of sha1_process_block64
61 value 486 x86-64 486 x86-64 61 value 486 x86-64 486 x86-64
62 0 339 374 4149 4167 62 0 339 374 4149 4167
63 1,2,3 200 195 358 380 63 1 224 229 654 732
64 2,3 200 195 358 380
64 65
65config SHA3_SMALL 66config SHA3_SMALL
66 int "SHA3: Trade bytes for speed (0:fast, 1:slow)" 67 int "SHA3: Trade bytes for speed (0:fast, 1:slow)"
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c
index 75673e334..053ebe291 100644
--- a/libbb/hash_md5_sha.c
+++ b/libbb/hash_md5_sha.c
@@ -514,9 +514,9 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
514 do { \ 514 do { \
515 uint32_t work = EXPR(B, C, D); \ 515 uint32_t work = EXPR(B, C, D); \
516 if (n <= 15) \ 516 if (n <= 15) \
517 work += W[n & 0xf] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); \ 517 work += W[n & 15] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]); \
518 if (n >= 16) \ 518 if (n >= 16) \
519 work += W[n & 0xf] = rotl32(W[(n+13) & 0xf] ^ W[(n+8) & 0xf] ^ W[(n+2) & 0xf] ^ W[n & 0xf], 1); \ 519 work += W[n & 15] = rotl32(W[(n+13) & 15] ^ W[(n+8) & 15] ^ W[(n+2) & 15] ^ W[n & 15], 1); \
520 E += work + rotl32(A, 5) + rconsts[n / 20]; \ 520 E += work + rotl32(A, 5) + rconsts[n / 20]; \
521 B = rotl32(B, 30); \ 521 B = rotl32(B, 30); \
522 } while (0) 522 } while (0)
@@ -549,9 +549,101 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
549 ctx->hash[3] += d; 549 ctx->hash[3] += d;
550 ctx->hash[4] += e; 550 ctx->hash[4] += e;
551} 551}
552#else 552#elif CONFIG_SHA1_SMALL == 1
553/* TODO: for CONFIG_SHA1_SMALL == 1, have a partially unrolled version? */ 553/* Middle-sized version, +300 bytes of code on x86. */
554static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
555{
556 static const uint32_t rconsts[] ALIGN4 = {
557 0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
558 };
559 int j;
560 int n;
561 uint32_t W[16+16];
562 uint32_t a, b, c, d, e;
563
564 a = ctx->hash[0];
565 b = ctx->hash[1];
566 c = ctx->hash[2];
567 d = ctx->hash[3];
568 e = ctx->hash[4];
569
570 /* 1st round of 20 operations */
571 n = 0;
572 do {
573 uint32_t work = ((c ^ d) & b) ^ d;
574 W[n] = W[n+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[n]);
575 work += W[n];
576 work += e + rotl32(a, 5) + rconsts[0];
577 /* Rotate by one for next time */
578 e = d;
579 d = c;
580 c = rotl32(b, 30);
581 b = a;
582 a = work;
583 n = (n + 1) & 15;
584 } while (n != 0);
585 do {
586 uint32_t work = ((c ^ d) & b) ^ d;
587 W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
588 work += W[n];
589 work += e + rotl32(a, 5) + rconsts[0];
590 e = d;
591 d = c;
592 c = rotl32(b, 30);
593 b = a;
594 a = work;
595 n = (n + 1) & 15;
596 } while (n != 4);
597 /* 2nd round of 20 operations */
598 j = 19;
599 do {
600 uint32_t work = c ^ d ^ b;
601 W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
602 work += W[n];
603 work += e + rotl32(a, 5) + rconsts[1];
604 e = d;
605 d = c;
606 c = rotl32(b, 30);
607 b = a;
608 a = work;
609 n = (n + 1) & 15;
610 } while (--j >= 0);
611 /* 3rd round */
612 j = 19;
613 do {
614 uint32_t work = ((b | c) & d) | (b & c);
615 W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
616 work += W[n];
617 work += e + rotl32(a, 5) + rconsts[2];
618 e = d;
619 d = c;
620 c = rotl32(b, 30);
621 b = a;
622 a = work;
623 n = (n + 1) & 15;
624 } while (--j >= 0);
625 /* 4th round */
626 j = 19;
627 do {
628 uint32_t work = c ^ d ^ b;
629 W[n] = W[n+16] = rotl32(W[n+13] ^ W[n+8] ^ W[n+2] ^ W[n], 1);
630 work += W[n];
631 work += e + rotl32(a, 5) + rconsts[3];
632 e = d;
633 d = c;
634 c = rotl32(b, 30);
635 b = a;
636 a = work;
637 n = (n + 1) & 15;
638 } while (--j >= 0);
554 639
640 ctx->hash[0] += a;
641 ctx->hash[1] += b;
642 ctx->hash[2] += c;
643 ctx->hash[3] += d;
644 ctx->hash[4] += e;
645}
646#else
555/* Compact version, almost twice as slow as fully unrolled */ 647/* Compact version, almost twice as slow as fully unrolled */
556static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx) 648static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
557{ 649{