aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDenys Vlasenko <vda.linux@googlemail.com>2022-01-01 15:01:53 +0100
committerDenys Vlasenko <vda.linux@googlemail.com>2022-01-01 15:01:53 +0100
commitd643010feeef312c77d7f51c3dd476d4e605c982 (patch)
tree16090cd94447527c057f953446e03cc7384c9c4e
parent5f6817020467598868b7d1c9ca477d7ccd66b87d (diff)
downloadbusybox-w32-d643010feeef312c77d7f51c3dd476d4e605c982.tar.gz
busybox-w32-d643010feeef312c77d7f51c3dd476d4e605c982.tar.bz2
busybox-w32-d643010feeef312c77d7f51c3dd476d4e605c982.zip
libbb/sha1: shrink x86_64 version - use r8..15 for W[8..15]
function old new delta sha1_process_block64 3683 3562 -121 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
-rw-r--r--libbb/Config.src2
-rw-r--r--libbb/hash_md5_sha.c299
2 files changed, 240 insertions, 61 deletions
diff --git a/libbb/Config.src b/libbb/Config.src
index e027c14a8..f66f65f81 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -59,7 +59,7 @@ config SHA1_SMALL
59 Trade binary size versus speed for the sha1 algorithm. 59 Trade binary size versus speed for the sha1 algorithm.
60 throughput MB/s size of sha1_process_block64 60 throughput MB/s size of sha1_process_block64
61 value 486 x86-64 486 x86-64 61 value 486 x86-64 486 x86-64
62 0 367 367 3657 3683 62 0 367 367 3657 3562
63 1 224 229 654 732 63 1 224 229 654 732
64 2,3 200 195 358 380 64 2,3 200 195 358 380
65 65
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c
index 9de30dfe6..a4e36066a 100644
--- a/libbb/hash_md5_sha.c
+++ b/libbb/hash_md5_sha.c
@@ -700,22 +700,194 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
700{ 700{
701 BUILD_BUG_ON(offsetof(sha1_ctx_t, hash) != 80); 701 BUILD_BUG_ON(offsetof(sha1_ctx_t, hash) != 80);
702 asm( 702 asm(
703// TODO: store W[] in r8..r15? (r8..r11 are callee-clobbered, no need to save)
704"\n\ 703"\n\
705 ##pushq %r15 # \n\ 704 pushq %r15 # \n\
706 ##pushq %r14 # \n\ 705 pushq %r14 # \n\
707 ##pushq %r13 # \n\ 706 pushq %r13 # \n\
708 ##pushq %r12 # \n\ 707 pushq %r12 # \n\
709 ##pushq %rbp # \n\ 708 pushq %rbp # \n\
710 ##pushq %rbx # \n\ 709 pushq %rbx # \n\
711 movq %rbp, %r8 # callee-saved \n\ 710 pushq %rdi # we need ctx at the end \n\
712 movq %rbx, %r9 # callee-saved \n\ 711 \n\
713 movq %rdi, %r10 # we need ctx at the end \n\ 712#Register and stack use: \n\
714 movl $15, %eax \n\ 713# eax..edx: a..d \n\
714# ebp: e \n\
715# esi,edi: temps \n\
716# -32+4*n(%rsp),r8...r15: W[0..7,8..15] \n\
717 .macro loadW n,r \n\
718 .if \\n == 0 \n\
719 movl -32+4*0(%rsp),\\r \n\
720 .endif \n\
721 .if \\n == 1 \n\
722 movl -32+4*1(%rsp),\\r \n\
723 .endif \n\
724 .if \\n == 2 \n\
725 movl -32+4*2(%rsp),\\r \n\
726 .endif \n\
727 .if \\n == 3 \n\
728 movl -32+4*3(%rsp),\\r \n\
729 .endif \n\
730 .if \\n == 4 \n\
731 movl -32+4*4(%rsp),\\r \n\
732 .endif \n\
733 .if \\n == 5 \n\
734 movl -32+4*5(%rsp),\\r \n\
735 .endif \n\
736 .if \\n == 6 \n\
737 movl -32+4*6(%rsp),\\r \n\
738 .endif \n\
739 .if \\n == 7 \n\
740 movl -32+4*7(%rsp),\\r \n\
741 .endif \n\
742 .if \\n == 8 \n\
743 movl %r8d,\\r \n\
744 .endif \n\
745 .if \\n == 9 \n\
746 movl %r9d,\\r \n\
747 .endif \n\
748 .if \\n == 10 \n\
749 movl %r10d,\\r \n\
750 .endif \n\
751 .if \\n == 11 \n\
752 movl %r11d,\\r \n\
753 .endif \n\
754 .if \\n == 12 \n\
755 movl %r12d,\\r \n\
756 .endif \n\
757 .if \\n == 13 \n\
758 movl %r13d,\\r \n\
759 .endif \n\
760 .if \\n == 14 \n\
761 movl %r14d,\\r \n\
762 .endif \n\
763 .if \\n == 15 \n\
764 movl %r15d,\\r \n\
765 .endif \n\
766 .endm \n\
767 \n\
768 .macro storeW r,n \n\
769 .if \\n == 0 \n\
770 movl \\r,-32+4*0(%rsp) \n\
771 .endif \n\
772 .if \\n == 1 \n\
773 movl \\r,-32+4*1(%rsp) \n\
774 .endif \n\
775 .if \\n == 2 \n\
776 movl \\r,-32+4*2(%rsp) \n\
777 .endif \n\
778 .if \\n == 3 \n\
779 movl \\r,-32+4*3(%rsp) \n\
780 .endif \n\
781 .if \\n == 4 \n\
782 movl \\r,-32+4*4(%rsp) \n\
783 .endif \n\
784 .if \\n == 5 \n\
785 movl \\r,-32+4*5(%rsp) \n\
786 .endif \n\
787 .if \\n == 6 \n\
788 movl \\r,-32+4*6(%rsp) \n\
789 .endif \n\
790 .if \\n == 7 \n\
791 movl \\r,-32+4*7(%rsp) \n\
792 .endif \n\
793 .if \\n == 8 \n\
794 movl \\r,%r8d \n\
795 .endif \n\
796 .if \\n == 9 \n\
797 movl \\r,%r9d \n\
798 .endif \n\
799 .if \\n == 10 \n\
800 movl \\r,%r10d \n\
801 .endif \n\
802 .if \\n == 11 \n\
803 movl \\r,%r11d \n\
804 .endif \n\
805 .if \\n == 12 \n\
806 movl \\r,%r12d \n\
807 .endif \n\
808 .if \\n == 13 \n\
809 movl \\r,%r13d \n\
810 .endif \n\
811 .if \\n == 14 \n\
812 movl \\r,%r14d \n\
813 .endif \n\
814 .if \\n == 15 \n\
815 movl \\r,%r15d \n\
816 .endif \n\
817 .endm \n\
818 \n\
819 .macro xorW n,r \n\
820 .if \\n == 0 \n\
821 xorl -32+4*0(%rsp),\\r \n\
822 .endif \n\
823 .if \\n == 1 \n\
824 xorl -32+4*1(%rsp),\\r \n\
825 .endif \n\
826 .if \\n == 2 \n\
827 xorl -32+4*2(%rsp),\\r \n\
828 .endif \n\
829 .if \\n == 3 \n\
830 xorl -32+4*3(%rsp),\\r \n\
831 .endif \n\
832 .if \\n == 4 \n\
833 xorl -32+4*4(%rsp),\\r \n\
834 .endif \n\
835 .if \\n == 5 \n\
836 xorl -32+4*5(%rsp),\\r \n\
837 .endif \n\
838 .if \\n == 6 \n\
839 xorl -32+4*6(%rsp),\\r \n\
840 .endif \n\
841 .if \\n == 7 \n\
842 xorl -32+4*7(%rsp),\\r \n\
843 .endif \n\
844 .if \\n == 8 \n\
845 xorl %r8d,\\r \n\
846 .endif \n\
847 .if \\n == 9 \n\
848 xorl %r9d,\\r \n\
849 .endif \n\
850 .if \\n == 10 \n\
851 xorl %r10d,\\r \n\
852 .endif \n\
853 .if \\n == 11 \n\
854 xorl %r11d,\\r \n\
855 .endif \n\
856 .if \\n == 12 \n\
857 xorl %r12d,\\r \n\
858 .endif \n\
859 .if \\n == 13 \n\
860 xorl %r13d,\\r \n\
861 .endif \n\
862 .if \\n == 14 \n\
863 xorl %r14d,\\r \n\
864 .endif \n\
865 .if \\n == 15 \n\
866 xorl %r15d,\\r \n\
867 .endif \n\
868 .endm \n\
869 \n\
870 movl 4*8(%rdi), %r8d \n\
871 bswap %r8d \n\
872 movl 4*9(%rdi), %r9d \n\
873 bswap %r9d \n\
874 movl 4*10(%rdi), %r10d \n\
875 bswap %r10d \n\
876 movl 4*11(%rdi), %r11d \n\
877 bswap %r11d \n\
878 movl 4*12(%rdi), %r12d \n\
879 bswap %r12d \n\
880 movl 4*13(%rdi), %r13d \n\
881 bswap %r13d \n\
882 movl 4*14(%rdi), %r14d \n\
883 bswap %r14d \n\
884 movl 4*15(%rdi), %r15d \n\
885 bswap %r15d \n\
886 movl $7, %eax \n\
7151: \n\ 8871: \n\
716 movl (%rdi,%rax,4), %esi \n\ 888 movl (%rdi,%rax,4), %esi \n\
717 bswap %esi \n\ 889 bswap %esi \n\
718 movl %esi, -64(%rsp,%rax,4) \n\ 890 movl %esi, -32(%rsp,%rax,4) \n\
719 decl %eax \n\ 891 decl %eax \n\
720 jns 1b \n\ 892 jns 1b \n\
721 movl 80(%rdi), %eax # a = ctx->hash[0] \n\ 893 movl 80(%rdi), %eax # a = ctx->hash[0] \n\
@@ -723,15 +895,10 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
723 movl 88(%rdi), %ecx # c = ctx->hash[2] \n\ 895 movl 88(%rdi), %ecx # c = ctx->hash[2] \n\
724 movl 92(%rdi), %edx # d = ctx->hash[3] \n\ 896 movl 92(%rdi), %edx # d = ctx->hash[3] \n\
725 movl 96(%rdi), %ebp # e = ctx->hash[4] \n\ 897 movl 96(%rdi), %ebp # e = ctx->hash[4] \n\
726#Register and stack use: \n\
727# eax..edx: a..d \n\
728# ebp: e \n\
729# esi,edi: temps \n\
730# -64+4*n(%rsp): W[n] \n\
731" 898"
732#define RD1As(a,b,c,d,e, n, RCONST) \ 899#define RD1As(a,b,c,d,e, n, RCONST) \
733"\n\ 900"\n\
734 ##movl -64+4*"n"(%rsp), %esi # n=0, W[0] already in %esi \n\ 901 ##loadW "n", %esi # n=0, W[0] already in %esi \n\
735 movl %e"c", %edi # c \n\ 902 movl %e"c", %edi # c \n\
736 xorl %e"d", %edi # ^d \n\ 903 xorl %e"d", %edi # ^d \n\
737 andl %e"b", %edi # &b \n\ 904 andl %e"b", %edi # &b \n\
@@ -745,7 +912,7 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
745" 912"
746#define RD1Bs(a,b,c,d,e, n, RCONST) \ 913#define RD1Bs(a,b,c,d,e, n, RCONST) \
747"\n\ 914"\n\
748 movl -64+4*"n"(%rsp), %esi # W[n] \n\ 915 loadW "n", %esi # W[n] \n\
749 movl %e"c", %edi # c \n\ 916 movl %e"c", %edi # c \n\
750 xorl %e"d", %edi # ^d \n\ 917 xorl %e"d", %edi # ^d \n\
751 andl %e"b", %edi # &b \n\ 918 andl %e"b", %edi # &b \n\
@@ -757,14 +924,27 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
757 addl %esi, %e"e" # e += rotl32(a,5) \n\ 924 addl %esi, %e"e" # e += rotl32(a,5) \n\
758 rorl $2, %e"b" # b = rotl32(b,30) \n\ 925 rorl $2, %e"b" # b = rotl32(b,30) \n\
759" 926"
760#define RD1Cs(a,b,c,d,e, n13,n8,n2,n, RCONST) \ 927#define RD1Cs(a,b,c,d,e, n, RCONST) \
761"\n\ 928"\n\
762 movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ 929 movl %e"c", %edi # c \n\
763 xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ 930 xorl %e"d", %edi # ^d \n\
764 xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ 931 andl %e"b", %edi # &b \n\
765 xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ 932 xorl %e"d", %edi # (((c ^ d) & b) ^ d) \n\
933 leal "RCONST"(%r"e",%r"n"), %e"e" # e += RCONST + W[n] \n\
934 addl %edi, %e"e" # e += (((c ^ d) & b) ^ d) \n\
935 movl %e"a", %esi # \n\
936 roll $5, %esi # rotl32(a,5) \n\
937 addl %esi, %e"e" # e += rotl32(a,5) \n\
938 rorl $2, %e"b" # b = rotl32(b,30) \n\
939"
940#define RD1Ds(a,b,c,d,e, n13,n8,n2,n, RCONST) \
941"\n\
942 loadW "n13", %esi # W[(n+13) & 15] \n\
943 xorW "n8", %esi # ^W[(n+8) & 15] \n\
944 xorW "n2", %esi # ^W[(n+2) & 15] \n\
945 xorW "n", %esi # ^W[n & 15] \n\
766 roll %esi # \n\ 946 roll %esi # \n\
767 movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] \n\ 947 storeW %esi, "n" # store to W[n & 15] \n\
768 movl %e"c", %edi # c \n\ 948 movl %e"c", %edi # c \n\
769 xorl %e"d", %edi # ^d \n\ 949 xorl %e"d", %edi # ^d \n\
770 andl %e"b", %edi # &b \n\ 950 andl %e"b", %edi # &b \n\
@@ -776,23 +956,24 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
776 addl %esi, %e"e" # e += rotl32(a,5) \n\ 956 addl %esi, %e"e" # e += rotl32(a,5) \n\
777 rorl $2, %e"b" # b = rotl32(b,30) \n\ 957 rorl $2, %e"b" # b = rotl32(b,30) \n\
778" 958"
779#define RD1A(a,b,c,d,e, n) RD1As(STR(a),STR(b),STR(c),STR(d),STR(e), STR((n)), STR(RCONST)) 959#define RD1A(a,b,c,d,e, n) RD1As(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST))
780#define RD1B(a,b,c,d,e, n) RD1Bs(STR(a),STR(b),STR(c),STR(d),STR(e), STR((n)), STR(RCONST)) 960#define RD1B(a,b,c,d,e, n) RD1Bs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST))
781#define RD1C(a,b,c,d,e, n) RD1Cs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((n+13)&15)), STR(((n+8)&15)), STR(((n+2)&15)), STR(((n)&15)), STR(RCONST)) 961#define RD1C(a,b,c,d,e, n) RD1Cs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST))
962#define RD1D(a,b,c,d,e, n) RD1Ds(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((n+13)&15)), STR(((n+8)&15)), STR(((n+2)&15)), STR(((n)&15)), STR(RCONST))
782#undef RCONST 963#undef RCONST
783#define RCONST 0x5A827999 964#define RCONST 0x5A827999
784 RD1A(ax,bx,cx,dx,bp, 0) RD1B(bp,ax,bx,cx,dx, 1) RD1B(dx,bp,ax,bx,cx, 2) RD1B(cx,dx,bp,ax,bx, 3) RD1B(bx,cx,dx,bp,ax, 4) 965 RD1A(ax,bx,cx,dx,bp, 0) RD1B(bp,ax,bx,cx,dx, 1) RD1B(dx,bp,ax,bx,cx, 2) RD1B(cx,dx,bp,ax,bx, 3) RD1B(bx,cx,dx,bp,ax, 4)
785 RD1B(ax,bx,cx,dx,bp, 5) RD1B(bp,ax,bx,cx,dx, 6) RD1B(dx,bp,ax,bx,cx, 7) RD1B(cx,dx,bp,ax,bx, 8) RD1B(bx,cx,dx,bp,ax, 9) 966 RD1B(ax,bx,cx,dx,bp, 5) RD1B(bp,ax,bx,cx,dx, 6) RD1B(dx,bp,ax,bx,cx, 7) RD1C(cx,dx,bp,ax,bx, 8) RD1C(bx,cx,dx,bp,ax, 9)
786 RD1B(ax,bx,cx,dx,bp,10) RD1B(bp,ax,bx,cx,dx,11) RD1B(dx,bp,ax,bx,cx,12) RD1B(cx,dx,bp,ax,bx,13) RD1B(bx,cx,dx,bp,ax,14) 967 RD1C(ax,bx,cx,dx,bp,10) RD1C(bp,ax,bx,cx,dx,11) RD1C(dx,bp,ax,bx,cx,12) RD1C(cx,dx,bp,ax,bx,13) RD1C(bx,cx,dx,bp,ax,14)
787 RD1B(ax,bx,cx,dx,bp,15) RD1C(bp,ax,bx,cx,dx,16) RD1C(dx,bp,ax,bx,cx,17) RD1C(cx,dx,bp,ax,bx,18) RD1C(bx,cx,dx,bp,ax,19) 968 RD1C(ax,bx,cx,dx,bp,15) RD1D(bp,ax,bx,cx,dx,16) RD1D(dx,bp,ax,bx,cx,17) RD1D(cx,dx,bp,ax,bx,18) RD1D(bx,cx,dx,bp,ax,19)
788#define RD2s(a,b,c,d,e, n13,n8,n2,n, RCONST) \ 969#define RD2s(a,b,c,d,e, n13,n8,n2,n, RCONST) \
789"\n\ 970"\n\
790 movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ 971 loadW "n13", %esi # W[(n+13) & 15] \n\
791 xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ 972 xorW "n8", %esi # ^W[(n+8) & 15] \n\
792 xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ 973 xorW "n2", %esi # ^W[(n+2) & 15] \n\
793 xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ 974 xorW "n", %esi # ^W[n & 15] \n\
794 roll %esi # \n\ 975 roll %esi # \n\
795 movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] \n\ 976 storeW %esi, "n" # store to W[n & 15] \n\
796 movl %e"c", %edi # c \n\ 977 movl %e"c", %edi # c \n\
797 xorl %e"d", %edi # ^d \n\ 978 xorl %e"d", %edi # ^d \n\
798 xorl %e"b", %edi # ^b \n\ 979 xorl %e"b", %edi # ^b \n\
@@ -819,12 +1000,12 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
819 andl %e"c", %esi # si: b & c \n\ 1000 andl %e"c", %esi # si: b & c \n\
820 andl %e"d", %edi # di: (b | c) & d \n\ 1001 andl %e"d", %edi # di: (b | c) & d \n\
821 orl %esi, %edi # ((b | c) & d) | (b & c) \n\ 1002 orl %esi, %edi # ((b | c) & d) | (b & c) \n\
822 movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ 1003 loadW "n13", %esi # W[(n+13) & 15] \n\
823 xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ 1004 xorW "n8", %esi # ^W[(n+8) & 15] \n\
824 xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ 1005 xorW "n2", %esi # ^W[(n+2) & 15] \n\
825 xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ 1006 xorW "n", %esi # ^W[n & 15] \n\
826 roll %esi # \n\ 1007 roll %esi # \n\
827 movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] \n\ 1008 storeW %esi, "n" # store to W[n & 15] \n\
828 addl %edi, %e"e" # += ((b | c) & d) | (b & c)\n\ 1009 addl %edi, %e"e" # += ((b | c) & d) | (b & c)\n\
829 leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ 1010 leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\
830 movl %e"a", %esi # \n\ 1011 movl %e"a", %esi # \n\
@@ -843,12 +1024,12 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
843 1024
844#define RD4As(a,b,c,d,e, n13,n8,n2,n, RCONST) \ 1025#define RD4As(a,b,c,d,e, n13,n8,n2,n, RCONST) \
845"\n\ 1026"\n\
846 movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ 1027 loadW "n13", %esi # W[(n+13) & 15] \n\
847 xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ 1028 xorW "n8", %esi # ^W[(n+8) & 15] \n\
848 xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ 1029 xorW "n2", %esi # ^W[(n+2) & 15] \n\
849 xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ 1030 xorW "n", %esi # ^W[n & 15] \n\
850 roll %esi # \n\ 1031 roll %esi # \n\
851 movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] \n\ 1032 storeW %esi, "n" # store to W[n & 15] \n\
852 movl %e"c", %edi # c \n\ 1033 movl %e"c", %edi # c \n\
853 xorl %e"d", %edi # ^d \n\ 1034 xorl %e"d", %edi # ^d \n\
854 xorl %e"b", %edi # ^b \n\ 1035 xorl %e"b", %edi # ^b \n\
@@ -861,12 +1042,12 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
861" 1042"
862#define RD4Bs(a,b,c,d,e, n13,n8,n2,n, RCONST) \ 1043#define RD4Bs(a,b,c,d,e, n13,n8,n2,n, RCONST) \
863"\n\ 1044"\n\
864 movl -64+4*"n13"(%rsp), %esi # W[(n+13) & 15] \n\ 1045 loadW "n13", %esi # W[(n+13) & 15] \n\
865 xorl -64+4*"n8"(%rsp), %esi # ^W[(n+8) & 15] \n\ 1046 xorW "n8", %esi # ^W[(n+8) & 15] \n\
866 xorl -64+4*"n2"(%rsp), %esi # ^W[(n+2) & 15] \n\ 1047 xorW "n2", %esi # ^W[(n+2) & 15] \n\
867 xorl -64+4*"n"(%rsp), %esi # ^W[n & 15] \n\ 1048 xorW "n", %esi # ^W[n & 15] \n\
868 roll %esi # \n\ 1049 roll %esi # \n\
869 ##movl %esi, -64+4*"n"(%rsp) # store to W[n & 15] elided \n\ 1050 #storeW %esi, "n" # store to W[n & 15] elided \n\
870 movl %e"c", %edi # c \n\ 1051 movl %e"c", %edi # c \n\
871 xorl %e"d", %edi # ^d \n\ 1052 xorl %e"d", %edi # ^d \n\
872 xorl %e"b", %edi # ^b \n\ 1053 xorl %e"b", %edi # ^b \n\
@@ -888,20 +1069,18 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM)
888 RD4A(ax,bx,cx,dx,bp,15) RD4A(bp,ax,bx,cx,dx,16) RD4B(dx,bp,ax,bx,cx,17) RD4B(cx,dx,bp,ax,bx,18) RD4B(bx,cx,dx,bp,ax,19) 1069 RD4A(ax,bx,cx,dx,bp,15) RD4A(bp,ax,bx,cx,dx,16) RD4B(dx,bp,ax,bx,cx,17) RD4B(cx,dx,bp,ax,bx,18) RD4B(bx,cx,dx,bp,ax,19)
889 1070
890"\n\ 1071"\n\
891 movq %r10, %rdi # \n\ 1072 popq %rdi # \n\
892 addl %eax, 80(%rdi) # ctx->hash[0] += a \n\ 1073 addl %eax, 80(%rdi) # ctx->hash[0] += a \n\
893 addl %ebx, 84(%rdi) # ctx->hash[1] += b \n\ 1074 addl %ebx, 84(%rdi) # ctx->hash[1] += b \n\
894 addl %ecx, 88(%rdi) # ctx->hash[2] += c \n\ 1075 addl %ecx, 88(%rdi) # ctx->hash[2] += c \n\
895 addl %edx, 92(%rdi) # ctx->hash[3] += d \n\ 1076 addl %edx, 92(%rdi) # ctx->hash[3] += d \n\
896 addl %ebp, 96(%rdi) # ctx->hash[4] += e \n\ 1077 addl %ebp, 96(%rdi) # ctx->hash[4] += e \n\
897 movq %r9, %rbx # callee-saved \n\ 1078 popq %rbx # \n\
898 movq %r8, %rbp # callee-saved \n\ 1079 popq %rbp # \n\
899 ##popq %rbx # \n\ 1080 popq %r12 # \n\
900 ##popq %rbp # \n\ 1081 popq %r13 # \n\
901 ##popq %r12 # \n\ 1082 popq %r14 # \n\
902 ##popq %r13 # \n\ 1083 popq %r15 # \n\
903 ##popq %r14 # \n\
904 ##popq %r15 # \n\
905" 1084"
906 ); /* asm */ 1085 ); /* asm */
907#undef RCONST 1086#undef RCONST