diff options
author | Denys Vlasenko <vda.linux@googlemail.com> | 2022-01-03 01:57:29 +0100 |
---|---|---|
committer | Denys Vlasenko <vda.linux@googlemail.com> | 2022-01-03 12:57:36 +0100 |
commit | 05fd13ebec869fc5e6f226481a2405a2685e8db1 (patch) | |
tree | bd112a629c547af9bb2a974d1b47fd5f193c3813 /libbb/hash_md5_sha.c | |
parent | 5c0c5582319a5123635c9fd62f8e99ef01cceb3f (diff) | |
download | busybox-w32-05fd13ebec869fc5e6f226481a2405a2685e8db1.tar.gz busybox-w32-05fd13ebec869fc5e6f226481a2405a2685e8db1.tar.bz2 busybox-w32-05fd13ebec869fc5e6f226481a2405a2685e8db1.zip |
libbb/sha1: x86_64 version: move to a separate .S file, no code changes
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Diffstat (limited to 'libbb/hash_md5_sha.c')
-rw-r--r-- | libbb/hash_md5_sha.c | 392 |
1 files changed, 3 insertions, 389 deletions
diff --git a/libbb/hash_md5_sha.c b/libbb/hash_md5_sha.c index 7eca3de4d..ee19c1cb7 100644 --- a/libbb/hash_md5_sha.c +++ b/libbb/hash_md5_sha.c | |||
@@ -696,397 +696,11 @@ static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) | |||
696 | #undef RCONST | 696 | #undef RCONST |
697 | } | 697 | } |
698 | # elif defined(__GNUC__) && defined(__x86_64__) | 698 | # elif defined(__GNUC__) && defined(__x86_64__) |
699 | static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM) | ||
700 | { | ||
701 | BUILD_BUG_ON(offsetof(sha1_ctx_t, hash) != 80); | ||
702 | asm( | ||
703 | "\n\ | ||
704 | pushq %r15 # \n\ | ||
705 | pushq %r14 # \n\ | ||
706 | pushq %r13 # \n\ | ||
707 | pushq %r12 # \n\ | ||
708 | pushq %rbp # \n\ | ||
709 | pushq %rbx # \n\ | ||
710 | pushq %rdi # we need ctx at the end \n\ | ||
711 | \n\ | ||
712 | #Register and stack use: \n\ | ||
713 | # eax..edx: a..d \n\ | ||
714 | # ebp: e \n\ | ||
715 | # esi,edi: temps \n\ | ||
716 | # -32+4*n(%rsp),r8...r15: W[0..7,8..15] \n\ | ||
717 | .macro loadW n,r \n\ | ||
718 | .if \\n == 0 \n\ | ||
719 | movl -32+4*0(%rsp),\\r \n\ | ||
720 | .endif \n\ | ||
721 | .if \\n == 1 \n\ | ||
722 | movl -32+4*1(%rsp),\\r \n\ | ||
723 | .endif \n\ | ||
724 | .if \\n == 2 \n\ | ||
725 | movl -32+4*2(%rsp),\\r \n\ | ||
726 | .endif \n\ | ||
727 | .if \\n == 3 \n\ | ||
728 | movl -32+4*3(%rsp),\\r \n\ | ||
729 | .endif \n\ | ||
730 | .if \\n == 4 \n\ | ||
731 | movl -32+4*4(%rsp),\\r \n\ | ||
732 | .endif \n\ | ||
733 | .if \\n == 5 \n\ | ||
734 | movl -32+4*5(%rsp),\\r \n\ | ||
735 | .endif \n\ | ||
736 | .if \\n == 6 \n\ | ||
737 | movl -32+4*6(%rsp),\\r \n\ | ||
738 | .endif \n\ | ||
739 | .if \\n == 7 \n\ | ||
740 | movl -32+4*7(%rsp),\\r \n\ | ||
741 | .endif \n\ | ||
742 | .if \\n == 8 \n\ | ||
743 | movl %r8d,\\r \n\ | ||
744 | .endif \n\ | ||
745 | .if \\n == 9 \n\ | ||
746 | movl %r9d,\\r \n\ | ||
747 | .endif \n\ | ||
748 | .if \\n == 10 \n\ | ||
749 | movl %r10d,\\r \n\ | ||
750 | .endif \n\ | ||
751 | .if \\n == 11 \n\ | ||
752 | movl %r11d,\\r \n\ | ||
753 | .endif \n\ | ||
754 | .if \\n == 12 \n\ | ||
755 | movl %r12d,\\r \n\ | ||
756 | .endif \n\ | ||
757 | .if \\n == 13 \n\ | ||
758 | movl %r13d,\\r \n\ | ||
759 | .endif \n\ | ||
760 | .if \\n == 14 \n\ | ||
761 | movl %r14d,\\r \n\ | ||
762 | .endif \n\ | ||
763 | .if \\n == 15 \n\ | ||
764 | movl %r15d,\\r \n\ | ||
765 | .endif \n\ | ||
766 | .endm \n\ | ||
767 | \n\ | ||
768 | .macro storeW r,n \n\ | ||
769 | .if \\n == 0 \n\ | ||
770 | movl \\r,-32+4*0(%rsp) \n\ | ||
771 | .endif \n\ | ||
772 | .if \\n == 1 \n\ | ||
773 | movl \\r,-32+4*1(%rsp) \n\ | ||
774 | .endif \n\ | ||
775 | .if \\n == 2 \n\ | ||
776 | movl \\r,-32+4*2(%rsp) \n\ | ||
777 | .endif \n\ | ||
778 | .if \\n == 3 \n\ | ||
779 | movl \\r,-32+4*3(%rsp) \n\ | ||
780 | .endif \n\ | ||
781 | .if \\n == 4 \n\ | ||
782 | movl \\r,-32+4*4(%rsp) \n\ | ||
783 | .endif \n\ | ||
784 | .if \\n == 5 \n\ | ||
785 | movl \\r,-32+4*5(%rsp) \n\ | ||
786 | .endif \n\ | ||
787 | .if \\n == 6 \n\ | ||
788 | movl \\r,-32+4*6(%rsp) \n\ | ||
789 | .endif \n\ | ||
790 | .if \\n == 7 \n\ | ||
791 | movl \\r,-32+4*7(%rsp) \n\ | ||
792 | .endif \n\ | ||
793 | .if \\n == 8 \n\ | ||
794 | movl \\r,%r8d \n\ | ||
795 | .endif \n\ | ||
796 | .if \\n == 9 \n\ | ||
797 | movl \\r,%r9d \n\ | ||
798 | .endif \n\ | ||
799 | .if \\n == 10 \n\ | ||
800 | movl \\r,%r10d \n\ | ||
801 | .endif \n\ | ||
802 | .if \\n == 11 \n\ | ||
803 | movl \\r,%r11d \n\ | ||
804 | .endif \n\ | ||
805 | .if \\n == 12 \n\ | ||
806 | movl \\r,%r12d \n\ | ||
807 | .endif \n\ | ||
808 | .if \\n == 13 \n\ | ||
809 | movl \\r,%r13d \n\ | ||
810 | .endif \n\ | ||
811 | .if \\n == 14 \n\ | ||
812 | movl \\r,%r14d \n\ | ||
813 | .endif \n\ | ||
814 | .if \\n == 15 \n\ | ||
815 | movl \\r,%r15d \n\ | ||
816 | .endif \n\ | ||
817 | .endm \n\ | ||
818 | \n\ | ||
819 | .macro xorW n,r \n\ | ||
820 | .if \\n == 0 \n\ | ||
821 | xorl -32+4*0(%rsp),\\r \n\ | ||
822 | .endif \n\ | ||
823 | .if \\n == 1 \n\ | ||
824 | xorl -32+4*1(%rsp),\\r \n\ | ||
825 | .endif \n\ | ||
826 | .if \\n == 2 \n\ | ||
827 | xorl -32+4*2(%rsp),\\r \n\ | ||
828 | .endif \n\ | ||
829 | .if \\n == 3 \n\ | ||
830 | xorl -32+4*3(%rsp),\\r \n\ | ||
831 | .endif \n\ | ||
832 | .if \\n == 4 \n\ | ||
833 | xorl -32+4*4(%rsp),\\r \n\ | ||
834 | .endif \n\ | ||
835 | .if \\n == 5 \n\ | ||
836 | xorl -32+4*5(%rsp),\\r \n\ | ||
837 | .endif \n\ | ||
838 | .if \\n == 6 \n\ | ||
839 | xorl -32+4*6(%rsp),\\r \n\ | ||
840 | .endif \n\ | ||
841 | .if \\n == 7 \n\ | ||
842 | xorl -32+4*7(%rsp),\\r \n\ | ||
843 | .endif \n\ | ||
844 | .if \\n == 8 \n\ | ||
845 | xorl %r8d,\\r \n\ | ||
846 | .endif \n\ | ||
847 | .if \\n == 9 \n\ | ||
848 | xorl %r9d,\\r \n\ | ||
849 | .endif \n\ | ||
850 | .if \\n == 10 \n\ | ||
851 | xorl %r10d,\\r \n\ | ||
852 | .endif \n\ | ||
853 | .if \\n == 11 \n\ | ||
854 | xorl %r11d,\\r \n\ | ||
855 | .endif \n\ | ||
856 | .if \\n == 12 \n\ | ||
857 | xorl %r12d,\\r \n\ | ||
858 | .endif \n\ | ||
859 | .if \\n == 13 \n\ | ||
860 | xorl %r13d,\\r \n\ | ||
861 | .endif \n\ | ||
862 | .if \\n == 14 \n\ | ||
863 | xorl %r14d,\\r \n\ | ||
864 | .endif \n\ | ||
865 | .if \\n == 15 \n\ | ||
866 | xorl %r15d,\\r \n\ | ||
867 | .endif \n\ | ||
868 | .endm \n\ | ||
869 | \n\ | ||
870 | movq 4*8(%rdi), %r8 \n\ | ||
871 | bswap %r8 \n\ | ||
872 | movl %r8d, %r9d \n\ | ||
873 | shrq $32, %r8 \n\ | ||
874 | movq 4*10(%rdi), %r10 \n\ | ||
875 | bswap %r10 \n\ | ||
876 | movl %r10d, %r11d \n\ | ||
877 | shrq $32, %r10 \n\ | ||
878 | movq 4*12(%rdi), %r12 \n\ | ||
879 | bswap %r12 \n\ | ||
880 | movl %r12d, %r13d \n\ | ||
881 | shrq $32, %r12 \n\ | ||
882 | movq 4*14(%rdi), %r14 \n\ | ||
883 | bswap %r14 \n\ | ||
884 | movl %r14d, %r15d \n\ | ||
885 | shrq $32, %r14 \n\ | ||
886 | \n\ | ||
887 | movl $3, %eax \n\ | ||
888 | 1: \n\ | ||
889 | movq (%rdi,%rax,8), %rsi \n\ | ||
890 | bswap %rsi \n\ | ||
891 | rolq $32, %rsi \n\ | ||
892 | movq %rsi, -32(%rsp,%rax,8) \n\ | ||
893 | decl %eax \n\ | ||
894 | jns 1b \n\ | ||
895 | movl 80(%rdi), %eax # a = ctx->hash[0] \n\ | ||
896 | movl 84(%rdi), %ebx # b = ctx->hash[1] \n\ | ||
897 | movl 88(%rdi), %ecx # c = ctx->hash[2] \n\ | ||
898 | movl 92(%rdi), %edx # d = ctx->hash[3] \n\ | ||
899 | movl 96(%rdi), %ebp # e = ctx->hash[4] \n\ | ||
900 | " | ||
901 | #define RD1As(a,b,c,d,e, n, RCONST) \ | ||
902 | "\n\ | ||
903 | ##loadW "n", %esi # n=0, W[0] already in %esi \n\ | ||
904 | movl %e"c", %edi # c \n\ | ||
905 | xorl %e"d", %edi # ^d \n\ | ||
906 | andl %e"b", %edi # &b \n\ | ||
907 | xorl %e"d", %edi # (((c ^ d) & b) ^ d) \n\ | ||
908 | leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + W[n] \n\ | ||
909 | addl %edi, %e"e" # e += (((c ^ d) & b) ^ d) \n\ | ||
910 | movl %e"a", %esi # \n\ | ||
911 | roll $5, %esi # rotl32(a,5) \n\ | ||
912 | addl %esi, %e"e" # e += rotl32(a,5) \n\ | ||
913 | rorl $2, %e"b" # b = rotl32(b,30) \n\ | ||
914 | " | ||
915 | #define RD1Bs(a,b,c,d,e, n, RCONST) \ | ||
916 | "\n\ | ||
917 | loadW "n", %esi # W[n] \n\ | ||
918 | movl %e"c", %edi # c \n\ | ||
919 | xorl %e"d", %edi # ^d \n\ | ||
920 | andl %e"b", %edi # &b \n\ | ||
921 | xorl %e"d", %edi # (((c ^ d) & b) ^ d) \n\ | ||
922 | leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + W[n] \n\ | ||
923 | addl %edi, %e"e" # e += (((c ^ d) & b) ^ d) \n\ | ||
924 | movl %e"a", %esi # \n\ | ||
925 | roll $5, %esi # rotl32(a,5) \n\ | ||
926 | addl %esi, %e"e" # e += rotl32(a,5) \n\ | ||
927 | rorl $2, %e"b" # b = rotl32(b,30) \n\ | ||
928 | " | ||
929 | #define RD1Cs(a,b,c,d,e, n, RCONST) \ | ||
930 | "\n\ | ||
931 | movl %e"c", %edi # c \n\ | ||
932 | xorl %e"d", %edi # ^d \n\ | ||
933 | andl %e"b", %edi # &b \n\ | ||
934 | xorl %e"d", %edi # (((c ^ d) & b) ^ d) \n\ | ||
935 | leal "RCONST"(%r"e",%r"n"), %e"e" # e += RCONST + W[n] \n\ | ||
936 | addl %edi, %e"e" # e += (((c ^ d) & b) ^ d) \n\ | ||
937 | movl %e"a", %esi # \n\ | ||
938 | roll $5, %esi # rotl32(a,5) \n\ | ||
939 | addl %esi, %e"e" # e += rotl32(a,5) \n\ | ||
940 | rorl $2, %e"b" # b = rotl32(b,30) \n\ | ||
941 | " | ||
942 | #define RD1Ds(a,b,c,d,e, n13,n8,n2,n, RCONST) \ | ||
943 | "\n\ | ||
944 | loadW "n13", %esi # W[(n+13) & 15] \n\ | ||
945 | xorW "n8", %esi # ^W[(n+8) & 15] \n\ | ||
946 | xorW "n2", %esi # ^W[(n+2) & 15] \n\ | ||
947 | xorW "n", %esi # ^W[n & 15] \n\ | ||
948 | roll %esi # \n\ | ||
949 | storeW %esi, "n" # store to W[n & 15] \n\ | ||
950 | movl %e"c", %edi # c \n\ | ||
951 | xorl %e"d", %edi # ^d \n\ | ||
952 | andl %e"b", %edi # &b \n\ | ||
953 | xorl %e"d", %edi # (((c ^ d) & b) ^ d) \n\ | ||
954 | leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ | ||
955 | addl %edi, %e"e" # e += (((c ^ d) & b) ^ d) \n\ | ||
956 | movl %e"a", %esi # \n\ | ||
957 | roll $5, %esi # rotl32(a,5) \n\ | ||
958 | addl %esi, %e"e" # e += rotl32(a,5) \n\ | ||
959 | rorl $2, %e"b" # b = rotl32(b,30) \n\ | ||
960 | " | ||
961 | #define RD1A(a,b,c,d,e, n) RD1As(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST)) | ||
962 | #define RD1B(a,b,c,d,e, n) RD1Bs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST)) | ||
963 | #define RD1C(a,b,c,d,e, n) RD1Cs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(n), STR(RCONST)) | ||
964 | #define RD1D(a,b,c,d,e, n) RD1Ds(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((n+13)&15)), STR(((n+8)&15)), STR(((n+2)&15)), STR(((n)&15)), STR(RCONST)) | ||
965 | #undef RCONST | ||
966 | #define RCONST 0x5A827999 | ||
967 | RD1A(ax,bx,cx,dx,bp, 0) RD1B(bp,ax,bx,cx,dx, 1) RD1B(dx,bp,ax,bx,cx, 2) RD1B(cx,dx,bp,ax,bx, 3) RD1B(bx,cx,dx,bp,ax, 4) | ||
968 | RD1B(ax,bx,cx,dx,bp, 5) RD1B(bp,ax,bx,cx,dx, 6) RD1B(dx,bp,ax,bx,cx, 7) RD1C(cx,dx,bp,ax,bx, 8) RD1C(bx,cx,dx,bp,ax, 9) | ||
969 | RD1C(ax,bx,cx,dx,bp,10) RD1C(bp,ax,bx,cx,dx,11) RD1C(dx,bp,ax,bx,cx,12) RD1C(cx,dx,bp,ax,bx,13) RD1C(bx,cx,dx,bp,ax,14) | ||
970 | RD1C(ax,bx,cx,dx,bp,15) RD1D(bp,ax,bx,cx,dx,16) RD1D(dx,bp,ax,bx,cx,17) RD1D(cx,dx,bp,ax,bx,18) RD1D(bx,cx,dx,bp,ax,19) | ||
971 | #define RD2s(a,b,c,d,e, n13,n8,n2,n, RCONST) \ | ||
972 | "\n\ | ||
973 | loadW "n13", %esi # W[(n+13) & 15] \n\ | ||
974 | xorW "n8", %esi # ^W[(n+8) & 15] \n\ | ||
975 | xorW "n2", %esi # ^W[(n+2) & 15] \n\ | ||
976 | xorW "n", %esi # ^W[n & 15] \n\ | ||
977 | roll %esi # \n\ | ||
978 | storeW %esi, "n" # store to W[n & 15] \n\ | ||
979 | movl %e"c", %edi # c \n\ | ||
980 | xorl %e"d", %edi # ^d \n\ | ||
981 | xorl %e"b", %edi # ^b \n\ | ||
982 | leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ | ||
983 | addl %edi, %e"e" # e += (c ^ d ^ b) \n\ | ||
984 | movl %e"a", %esi # \n\ | ||
985 | roll $5, %esi # rotl32(a,5) \n\ | ||
986 | addl %esi, %e"e" # e += rotl32(a,5) \n\ | ||
987 | rorl $2, %e"b" # b = rotl32(b,30) \n\ | ||
988 | " | ||
989 | #define RD2(a,b,c,d,e, n) RD2s(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((20+n+13)&15)), STR(((20+n+8)&15)), STR(((20+n+2)&15)), STR(((20+n)&15)), STR(RCONST)) | ||
990 | #undef RCONST | ||
991 | #define RCONST 0x6ED9EBA1 | ||
992 | RD2(ax,bx,cx,dx,bp, 0) RD2(bp,ax,bx,cx,dx, 1) RD2(dx,bp,ax,bx,cx, 2) RD2(cx,dx,bp,ax,bx, 3) RD2(bx,cx,dx,bp,ax, 4) | ||
993 | RD2(ax,bx,cx,dx,bp, 5) RD2(bp,ax,bx,cx,dx, 6) RD2(dx,bp,ax,bx,cx, 7) RD2(cx,dx,bp,ax,bx, 8) RD2(bx,cx,dx,bp,ax, 9) | ||
994 | RD2(ax,bx,cx,dx,bp,10) RD2(bp,ax,bx,cx,dx,11) RD2(dx,bp,ax,bx,cx,12) RD2(cx,dx,bp,ax,bx,13) RD2(bx,cx,dx,bp,ax,14) | ||
995 | RD2(ax,bx,cx,dx,bp,15) RD2(bp,ax,bx,cx,dx,16) RD2(dx,bp,ax,bx,cx,17) RD2(cx,dx,bp,ax,bx,18) RD2(bx,cx,dx,bp,ax,19) | ||
996 | |||
997 | #define RD3s(a,b,c,d,e, n13,n8,n2,n, RCONST) \ | ||
998 | "\n\ | ||
999 | movl %e"b", %edi # di: b \n\ | ||
1000 | movl %e"b", %esi # si: b \n\ | ||
1001 | orl %e"c", %edi # di: b | c \n\ | ||
1002 | andl %e"c", %esi # si: b & c \n\ | ||
1003 | andl %e"d", %edi # di: (b | c) & d \n\ | ||
1004 | orl %esi, %edi # ((b | c) & d) | (b & c) \n\ | ||
1005 | loadW "n13", %esi # W[(n+13) & 15] \n\ | ||
1006 | xorW "n8", %esi # ^W[(n+8) & 15] \n\ | ||
1007 | xorW "n2", %esi # ^W[(n+2) & 15] \n\ | ||
1008 | xorW "n", %esi # ^W[n & 15] \n\ | ||
1009 | roll %esi # \n\ | ||
1010 | storeW %esi, "n" # store to W[n & 15] \n\ | ||
1011 | addl %edi, %e"e" # += ((b | c) & d) | (b & c)\n\ | ||
1012 | leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ | ||
1013 | movl %e"a", %esi # \n\ | ||
1014 | roll $5, %esi # rotl32(a,5) \n\ | ||
1015 | addl %esi, %e"e" # e += rotl32(a,5) \n\ | ||
1016 | rorl $2, %e"b" # b = rotl32(b,30) \n\ | ||
1017 | " | ||
1018 | #define RD3(a,b,c,d,e, n) RD3s(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((40+n+13)&15)), STR(((40+n+8)&15)), STR(((40+n+2)&15)), STR(((40+n)&15)), STR(RCONST)) | ||
1019 | #undef RCONST | ||
1020 | //#define RCONST 0x8F1BBCDC "out of range for signed 32bit displacement" | ||
1021 | #define RCONST -0x70e44324 | ||
1022 | RD3(ax,bx,cx,dx,bp, 0) RD3(bp,ax,bx,cx,dx, 1) RD3(dx,bp,ax,bx,cx, 2) RD3(cx,dx,bp,ax,bx, 3) RD3(bx,cx,dx,bp,ax, 4) | ||
1023 | RD3(ax,bx,cx,dx,bp, 5) RD3(bp,ax,bx,cx,dx, 6) RD3(dx,bp,ax,bx,cx, 7) RD3(cx,dx,bp,ax,bx, 8) RD3(bx,cx,dx,bp,ax, 9) | ||
1024 | RD3(ax,bx,cx,dx,bp,10) RD3(bp,ax,bx,cx,dx,11) RD3(dx,bp,ax,bx,cx,12) RD3(cx,dx,bp,ax,bx,13) RD3(bx,cx,dx,bp,ax,14) | ||
1025 | RD3(ax,bx,cx,dx,bp,15) RD3(bp,ax,bx,cx,dx,16) RD3(dx,bp,ax,bx,cx,17) RD3(cx,dx,bp,ax,bx,18) RD3(bx,cx,dx,bp,ax,19) | ||
1026 | 699 | ||
1027 | #define RD4As(a,b,c,d,e, n13,n8,n2,n, RCONST) \ | 700 | /* in hash_md5_sha_x86-64.S */ |
1028 | "\n\ | 701 | struct ASM_expects_80 { char t[1 - 2*(offsetof(sha1_ctx_t, hash) != 80)]; }; |
1029 | loadW "n13", %esi # W[(n+13) & 15] \n\ | 702 | void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx UNUSED_PARAM); |
1030 | xorW "n8", %esi # ^W[(n+8) & 15] \n\ | ||
1031 | xorW "n2", %esi # ^W[(n+2) & 15] \n\ | ||
1032 | xorW "n", %esi # ^W[n & 15] \n\ | ||
1033 | roll %esi # \n\ | ||
1034 | storeW %esi, "n" # store to W[n & 15] \n\ | ||
1035 | movl %e"c", %edi # c \n\ | ||
1036 | xorl %e"d", %edi # ^d \n\ | ||
1037 | xorl %e"b", %edi # ^b \n\ | ||
1038 | leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ | ||
1039 | addl %edi, %e"e" # e += (c ^ d ^ b) \n\ | ||
1040 | movl %e"a", %esi # \n\ | ||
1041 | roll $5, %esi # rotl32(a,5) \n\ | ||
1042 | addl %esi, %e"e" # e += rotl32(a,5) \n\ | ||
1043 | rorl $2, %e"b" # b = rotl32(b,30) \n\ | ||
1044 | " | ||
1045 | #define RD4Bs(a,b,c,d,e, n13,n8,n2,n, RCONST) \ | ||
1046 | "\n\ | ||
1047 | loadW "n13", %esi # W[(n+13) & 15] \n\ | ||
1048 | xorW "n8", %esi # ^W[(n+8) & 15] \n\ | ||
1049 | xorW "n2", %esi # ^W[(n+2) & 15] \n\ | ||
1050 | xorW "n", %esi # ^W[n & 15] \n\ | ||
1051 | roll %esi # \n\ | ||
1052 | #storeW %esi, "n" # store to W[n & 15] elided \n\ | ||
1053 | movl %e"c", %edi # c \n\ | ||
1054 | xorl %e"d", %edi # ^d \n\ | ||
1055 | xorl %e"b", %edi # ^b \n\ | ||
1056 | leal "RCONST"(%r"e",%rsi), %e"e" # e += RCONST + mixed_W \n\ | ||
1057 | addl %edi, %e"e" # e += (c ^ d ^ b) \n\ | ||
1058 | movl %e"a", %esi # \n\ | ||
1059 | roll $5, %esi # rotl32(a,5) \n\ | ||
1060 | addl %esi, %e"e" # e += rotl32(a,5) \n\ | ||
1061 | rorl $2, %e"b" # b = rotl32(b,30) \n\ | ||
1062 | " | ||
1063 | #define RD4A(a,b,c,d,e, n) RD4As(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST)) | ||
1064 | #define RD4B(a,b,c,d,e, n) RD4Bs(STR(a),STR(b),STR(c),STR(d),STR(e), STR(((60+n+13)&15)), STR(((60+n+8)&15)), STR(((60+n+2)&15)), STR(((60+n)&15)), STR(RCONST)) | ||
1065 | #undef RCONST | ||
1066 | //#define RCONST 0xCA62C1D6 "out of range for signed 32bit displacement" | ||
1067 | #define RCONST -0x359d3e2a | ||
1068 | RD4A(ax,bx,cx,dx,bp, 0) RD4A(bp,ax,bx,cx,dx, 1) RD4A(dx,bp,ax,bx,cx, 2) RD4A(cx,dx,bp,ax,bx, 3) RD4A(bx,cx,dx,bp,ax, 4) | ||
1069 | RD4A(ax,bx,cx,dx,bp, 5) RD4A(bp,ax,bx,cx,dx, 6) RD4A(dx,bp,ax,bx,cx, 7) RD4A(cx,dx,bp,ax,bx, 8) RD4A(bx,cx,dx,bp,ax, 9) | ||
1070 | RD4A(ax,bx,cx,dx,bp,10) RD4A(bp,ax,bx,cx,dx,11) RD4A(dx,bp,ax,bx,cx,12) RD4A(cx,dx,bp,ax,bx,13) RD4A(bx,cx,dx,bp,ax,14) | ||
1071 | RD4A(ax,bx,cx,dx,bp,15) RD4A(bp,ax,bx,cx,dx,16) RD4B(dx,bp,ax,bx,cx,17) RD4B(cx,dx,bp,ax,bx,18) RD4B(bx,cx,dx,bp,ax,19) | ||
1072 | 703 | ||
1073 | "\n\ | ||
1074 | popq %rdi # \n\ | ||
1075 | addl %eax, 80(%rdi) # ctx->hash[0] += a \n\ | ||
1076 | addl %ebx, 84(%rdi) # ctx->hash[1] += b \n\ | ||
1077 | addl %ecx, 88(%rdi) # ctx->hash[2] += c \n\ | ||
1078 | addl %edx, 92(%rdi) # ctx->hash[3] += d \n\ | ||
1079 | addl %ebp, 96(%rdi) # ctx->hash[4] += e \n\ | ||
1080 | popq %rbx # \n\ | ||
1081 | popq %rbp # \n\ | ||
1082 | popq %r12 # \n\ | ||
1083 | popq %r13 # \n\ | ||
1084 | popq %r14 # \n\ | ||
1085 | popq %r15 # \n\ | ||
1086 | " | ||
1087 | ); /* asm */ | ||
1088 | #undef RCONST | ||
1089 | } | ||
1090 | # else | 704 | # else |
1091 | /* Fast, fully-unrolled SHA1. +3800 bytes of code on x86. | 705 | /* Fast, fully-unrolled SHA1. +3800 bytes of code on x86. |
1092 | * It seems further speedup can be achieved by handling more than | 706 | * It seems further speedup can be achieved by handling more than |