aboutsummaryrefslogtreecommitdiff
path: root/libbb
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2022-01-01 15:42:15 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2022-01-01 15:42:15 +0100
commit4d4f1f2096f06d69a6f205f0d8e33d4398f25677 (patch)
treef3ab167117ee36b55d98ddd6cc49eb087de64b0b /libbb
parentd643010feeef312c77d7f51c3dd476d4e605c982 (diff)
downloadbusybox-w32-4d4f1f2096f06d69a6f205f0d8e33d4398f25677.tar.gz
busybox-w32-4d4f1f2096f06d69a6f205f0d8e33d4398f25677.tar.bz2
busybox-w32-4d4f1f2096f06d69a6f205f0d8e33d4398f25677.zip
libbb/sha1: x86_64 version: bswap in 64-bit chunks
function old new delta sha1_process_block64 3562 3570 +8 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb')
-rw-r--r--libbb/Config.src2
-rw-r--r--libbb/hash_md5_sha.c42
2 files changed, 23 insertions, 21 deletions
diff --git a/libbb/Config.src b/libbb/Config.src
index f66f65f81..42a2283aa 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -59,7 +59,7 @@ config SHA1_SMALL
59 Trade binary size versus speed for the sha1 algorithm. 59 Trade binary size versus speed for the sha1 algorithm.
60 throughput MB/s size of sha1_process_block64 60 throughput MB/s size of sha1_process_block64
61 value 486 x86-64 486 x86-64 61 value 486 x86-64 486 x86-64
62 0 367 367 3657 3562 62 0 367 367 3657 3570
63 1 224 229 654 732 63 1 224 229 654 732
64 2,3 200 195 358 380 64 2,3 200 195 358 380
65 65
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c
index a4e36066a..959bfc951 100644
--- a/libbb/hash_md5_sha.c
+++ b/libbb/hash_md5_sha.c
@@ -867,27 +867,29 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
867 .endif \n\ 867 .endif \n\
868 .endm \n\ 868 .endm \n\
869 \n\ 869 \n\
870 movl 4*8(%rdi), %r8d \n\ 870 movq 4*8(%rdi), %r8 \n\
871 bswap %r8d \n\ 871 bswap %r8 \n\
872 movl 4*9(%rdi), %r9d \n\ 872 movl %r8d, %r9d \n\
873 bswap %r9d \n\ 873 shrq $32, %r8 \n\
874 movl 4*10(%rdi), %r10d \n\ 874 movq 4*10(%rdi), %r10 \n\
875 bswap %r10d \n\ 875 bswap %r10 \n\
876 movl 4*11(%rdi), %r11d \n\ 876 movl %r10d, %r11d \n\
877 bswap %r11d \n\ 877 shrq $32, %r10 \n\
878 movl 4*12(%rdi), %r12d \n\ 878 movq 4*12(%rdi), %r12 \n\
879 bswap %r12d \n\ 879 bswap %r12 \n\
880 movl 4*13(%rdi), %r13d \n\ 880 movl %r12d, %r13d \n\
881 bswap %r13d \n\ 881 shrq $32, %r12 \n\
882 movl 4*14(%rdi), %r14d \n\ 882 movq 4*14(%rdi), %r14 \n\
883 bswap %r14d \n\ 883 bswap %r14 \n\
884 movl 4*15(%rdi), %r15d \n\ 884 movl %r14d, %r15d \n\
885 bswap %r15d \n\ 885 shrq $32, %r14 \n\
886 movl $7, %eax \n\ 886 \n\
887 movl $3, %eax \n\
8871: \n\ 8881: \n\
888 movl (%rdi,%rax,4), %esi \n\ 889 movq (%rdi,%rax,8), %rsi \n\
889 bswap %esi \n\ 890 bswap %rsi \n\
890 movl %esi, -32(%rsp,%rax,4) \n\ 891 rolq $32, %rsi \n\
892 movq %rsi, -32(%rsp,%rax,8) \n\
891 decl %eax \n\ 893 decl %eax \n\
892 jns 1b \n\ 894 jns 1b \n\
893 movl 80(%rdi), %eax # a = ctx->hash[0] \n\ 895 movl 80(%rdi), %eax # a = ctx->hash[0] \n\