diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2022-01-01 15:42:15 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2022-01-01 15:42:15 +0100 |
commit | 4d4f1f2096f06d69a6f205f0d8e33d4398f25677 (patch) | |
tree | f3ab167117ee36b55d98ddd6cc49eb087de64b0b /libbb | |
parent | d643010feeef312c77d7f51c3dd476d4e605c982 (diff) | |
download | busybox-w32-4d4f1f2096f06d69a6f205f0d8e33d4398f25677.tar.gz busybox-w32-4d4f1f2096f06d69a6f205f0d8e33d4398f25677.tar.bz2 busybox-w32-4d4f1f2096f06d69a6f205f0d8e33d4398f25677.zip |
libbb/sha1: x86_64 version: bswap in 64-bit chunks
function old new delta
sha1_process_block64 3562 3570 +8
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb')
-rw-r--r-- | libbb/Config.src | 2 | ||||
-rw-r--r-- | libbb/hash_md5_sha.c | 42 |
2 files changed, 23 insertions, 21 deletions
diff --git a/libbb/Config.src b/libbb/Config.src index f66f65f81..42a2283aa 100644 --- a/libbb/Config.src +++ b/libbb/Config.src | |||
@@ -59,7 +59,7 @@ config SHA1_SMALL | |||
59 | Trade binary size versus speed for the sha1 algorithm. | 59 | Trade binary size versus speed for the sha1 algorithm. |
60 | throughput MB/s size of sha1_process_block64 | 60 | throughput MB/s size of sha1_process_block64 |
61 | value 486 x86-64 486 x86-64 | 61 | value 486 x86-64 486 x86-64 |
62 | 0 367 367 3657 3562 | 62 | 0 367 367 3657 3570 |
63 | 1 224 229 654 732 | 63 | 1 224 229 654 732 |
64 | 2,3 200 195 358 380 | 64 | 2,3 200 195 358 380 |
65 | 65 | ||
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index a4e36066a..959bfc951 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c | |||
@@ -867,27 +867,29 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) | |||
867 | .endif \n\ | 867 | .endif \n\ |
868 | .endm \n\ | 868 | .endm \n\ |
869 | \n\ | 869 | \n\ |
870 | movl 4*8(%rdi), %r8d \n\ | 870 | movq 4*8(%rdi), %r8 \n\ |
871 | bswap %r8d \n\ | 871 | bswap %r8 \n\ |
872 | movl 4*9(%rdi), %r9d \n\ | 872 | movl %r8d, %r9d \n\ |
873 | bswap %r9d \n\ | 873 | shrq $32, %r8 \n\ |
874 | movl 4*10(%rdi), %r10d \n\ | 874 | movq 4*10(%rdi), %r10 \n\ |
875 | bswap %r10d \n\ | 875 | bswap %r10 \n\ |
876 | movl 4*11(%rdi), %r11d \n\ | 876 | movl %r10d, %r11d \n\ |
877 | bswap %r11d \n\ | 877 | shrq $32, %r10 \n\ |
878 | movl 4*12(%rdi), %r12d \n\ | 878 | movq 4*12(%rdi), %r12 \n\ |
879 | bswap %r12d \n\ | 879 | bswap %r12 \n\ |
880 | movl 4*13(%rdi), %r13d \n\ | 880 | movl %r12d, %r13d \n\ |
881 | bswap %r13d \n\ | 881 | shrq $32, %r12 \n\ |
882 | movl 4*14(%rdi), %r14d \n\ | 882 | movq 4*14(%rdi), %r14 \n\ |
883 | bswap %r14d \n\ | 883 | bswap %r14 \n\ |
884 | movl 4*15(%rdi), %r15d \n\ | 884 | movl %r14d, %r15d \n\ |
885 | bswap %r15d \n\ | 885 | shrq $32, %r14 \n\ |
886 | movl $7, %eax \n\ | 886 | \n\ |
887 | movl $3, %eax \n\ | ||
887 | 1: \n\ | 888 | 1: \n\ |
888 | movl (%rdi,%rax,4), %esi \n\ | 889 | movq (%rdi,%rax,8), %rsi \n\ |
889 | bswap %esi \n\ | 890 | bswap %rsi \n\ |
890 | movl %esi, -32(%rsp,%rax,4) \n\ | 891 | rolq $32, %rsi \n\ |
892 | movq %rsi, -32(%rsp,%rax,8) \n\ | ||
891 | decl %eax \n\ | 893 | decl %eax \n\ |
892 | jns 1b \n\ | 894 | jns 1b \n\ |
893 | movl 80(%rdi), %eax # a = ctx->hash[0] \n\ | 895 | movl 80(%rdi), %eax # a = ctx->hash[0] \n\ |