diff options
author | miod <> | 2023-02-01 20:45:04 +0000 |
---|---|---|
committer | miod <> | 2023-02-01 20:45:04 +0000 |
commit | dc4b2cf3e0979364f14ff89a07ffafc47435e2e7 (patch) | |
tree | 58219f5c6538172b9d00f33bf48c9a61419ec7a0 | |
parent | 86f42338b2994b620482c37e3d0d9fc3ba1f523b (diff) | |
download | openbsd-dc4b2cf3e0979364f14ff89a07ffafc47435e2e7.tar.gz openbsd-dc4b2cf3e0979364f14ff89a07ffafc47435e2e7.tar.bz2 openbsd-dc4b2cf3e0979364f14ff89a07ffafc47435e2e7.zip |
Move all data blocks from .text to .rodata and cleanup up and homogeneize code
responsible from getting the proper address of those blocks.
ok tb@ jsing@
-rw-r--r-- | src/lib/libcrypto/aes/asm/aes-586.pl | 45 | ||||
-rw-r--r-- | src/lib/libcrypto/aes/asm/aesni-x86.pl | 1 | ||||
-rw-r--r-- | src/lib/libcrypto/aes/asm/vpaes-x86.pl | 34 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/asm/bn-586.pl | 9 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/asm/x86-gf2m.pl | 5 | ||||
-rwxr-xr-x | src/lib/libcrypto/bn/asm/x86-mont.pl | 5 | ||||
-rw-r--r-- | src/lib/libcrypto/camellia/asm/cmll-x86.pl | 40 | ||||
-rw-r--r-- | src/lib/libcrypto/des/asm/des-586.pl | 9 | ||||
-rw-r--r-- | src/lib/libcrypto/modes/asm/ghash-x86.pl | 66 | ||||
-rw-r--r-- | src/lib/libcrypto/perlasm/cbc.pl | 82 | ||||
-rw-r--r-- | src/lib/libcrypto/perlasm/x86gas.pl | 92 | ||||
-rw-r--r-- | src/lib/libcrypto/rc4/asm/rc4-586.pl | 26 | ||||
-rw-r--r-- | src/lib/libcrypto/sha/asm/sha1-586.pl | 26 | ||||
-rw-r--r-- | src/lib/libcrypto/sha/asm/sha256-586.pl | 14 | ||||
-rw-r--r-- | src/lib/libcrypto/sha/asm/sha512-586.pl | 18 | ||||
-rw-r--r-- | src/lib/libcrypto/whrlpool/asm/wp-mmx.pl | 12 |
16 files changed, 248 insertions, 236 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl index c5ae3f6903..4e0f34cba3 100644 --- a/src/lib/libcrypto/aes/asm/aes-586.pl +++ b/src/lib/libcrypto/aes/asm/aes-586.pl | |||
@@ -950,8 +950,10 @@ sub enclast() | |||
950 | &xor ($s3,&DWP(12,$key)); | 950 | &xor ($s3,&DWP(12,$key)); |
951 | 951 | ||
952 | &ret (); | 952 | &ret (); |
953 | &function_end_B("_x86_AES_encrypt"); | ||
953 | 954 | ||
954 | &set_label("AES_Te",64); # Yes! I keep it in the code segment! | 955 | &rodataseg(); |
956 | &set_label("AES_Te",64); | ||
955 | &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); | 957 | &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); |
956 | &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); | 958 | &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); |
957 | &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56); | 959 | &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56); |
@@ -1154,7 +1156,7 @@ sub enclast() | |||
1154 | &data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080); | 1156 | &data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080); |
1155 | &data_word(0x0000001b, 0x00000036, 0x00000000, 0x00000000); | 1157 | &data_word(0x0000001b, 0x00000036, 0x00000000, 0x00000000); |
1156 | &data_word(0x00000000, 0x00000000, 0x00000000, 0x00000000); | 1158 | &data_word(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
1157 | &function_end_B("_x86_AES_encrypt"); | 1159 | &previous(); |
1158 | 1160 | ||
1159 | # void AES_encrypt (const void *inp,void *out,const AES_KEY *key); | 1161 | # void AES_encrypt (const void *inp,void *out,const AES_KEY *key); |
1160 | &function_begin("AES_encrypt"); | 1162 | &function_begin("AES_encrypt"); |
@@ -1174,11 +1176,9 @@ sub enclast() | |||
1174 | &add ("esp",4); # 4 is reserved for caller's return address | 1176 | &add ("esp",4); # 4 is reserved for caller's return address |
1175 | &mov ($_esp,$s0); # save stack pointer | 1177 | &mov ($_esp,$s0); # save stack pointer |
1176 | 1178 | ||
1177 | &call (&label("pic_point")); # make it PIC! | 1179 | &picsetup($tbl); |
1178 | &set_label("pic_point"); | 1180 | &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl); |
1179 | &blindpop($tbl); | 1181 | &picsymbol($tbl, &label("AES_Te"), $tbl); |
1180 | &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if (!$x86only); | ||
1181 | &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl)); | ||
1182 | 1182 | ||
1183 | # pick Te4 copy which can't "overlap" with stack frame or key schedule | 1183 | # pick Te4 copy which can't "overlap" with stack frame or key schedule |
1184 | &lea ($s1,&DWP(768-4,"esp")); | 1184 | &lea ($s1,&DWP(768-4,"esp")); |
@@ -1744,8 +1744,10 @@ sub declast() | |||
1744 | &xor ($s3,&DWP(12,$key)); | 1744 | &xor ($s3,&DWP(12,$key)); |
1745 | 1745 | ||
1746 | &ret (); | 1746 | &ret (); |
1747 | &function_end_B("_x86_AES_decrypt"); | ||
1747 | 1748 | ||
1748 | &set_label("AES_Td",64); # Yes! I keep it in the code segment! | 1749 | &rodataseg(); |
1750 | &set_label("AES_Td",64); | ||
1749 | &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a); | 1751 | &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a); |
1750 | &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b); | 1752 | &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b); |
1751 | &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5); | 1753 | &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5); |
@@ -1943,7 +1945,7 @@ sub declast() | |||
1943 | &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); | 1945 | &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); |
1944 | &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); | 1946 | &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); |
1945 | &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); | 1947 | &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); |
1946 | &function_end_B("_x86_AES_decrypt"); | 1948 | &previous(); |
1947 | 1949 | ||
1948 | # void AES_decrypt (const void *inp,void *out,const AES_KEY *key); | 1950 | # void AES_decrypt (const void *inp,void *out,const AES_KEY *key); |
1949 | &function_begin("AES_decrypt"); | 1951 | &function_begin("AES_decrypt"); |
@@ -1963,11 +1965,9 @@ sub declast() | |||
1963 | &add ("esp",4); # 4 is reserved for caller's return address | 1965 | &add ("esp",4); # 4 is reserved for caller's return address |
1964 | &mov ($_esp,$s0); # save stack pointer | 1966 | &mov ($_esp,$s0); # save stack pointer |
1965 | 1967 | ||
1966 | &call (&label("pic_point")); # make it PIC! | 1968 | &picsetup($tbl); |
1967 | &set_label("pic_point"); | 1969 | &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl); |
1968 | &blindpop($tbl); | 1970 | &picsymbol($tbl, &label("AES_Td"), $tbl); |
1969 | &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only); | ||
1970 | &lea ($tbl,&DWP(&label("AES_Td")."-".&label("pic_point"),$tbl)); | ||
1971 | 1971 | ||
1972 | # pick Td4 copy which can't "overlap" with stack frame or key schedule | 1972 | # pick Td4 copy which can't "overlap" with stack frame or key schedule |
1973 | &lea ($s1,&DWP(768-4,"esp")); | 1973 | &lea ($s1,&DWP(768-4,"esp")); |
@@ -2034,13 +2034,10 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds | |||
2034 | &cmp ($s2,0); | 2034 | &cmp ($s2,0); |
2035 | &je (&label("drop_out")); | 2035 | &je (&label("drop_out")); |
2036 | 2036 | ||
2037 | &call (&label("pic_point")); # make it PIC! | 2037 | &picsetup($tbl); |
2038 | &set_label("pic_point"); | 2038 | &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl); |
2039 | &blindpop($tbl); | 2039 | &picsymbol($tbl, &label("AES_Te"), $tbl); |
2040 | &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only); | ||
2041 | |||
2042 | &cmp (&wparam(5),0); | 2040 | &cmp (&wparam(5),0); |
2043 | &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl)); | ||
2044 | &jne (&label("picked_te")); | 2041 | &jne (&label("picked_te")); |
2045 | &lea ($tbl,&DWP(&label("AES_Td")."-".&label("AES_Te"),$tbl)); | 2042 | &lea ($tbl,&DWP(&label("AES_Td")."-".&label("AES_Te"),$tbl)); |
2046 | &set_label("picked_te"); | 2043 | &set_label("picked_te"); |
@@ -2659,10 +2656,9 @@ sub enckey() | |||
2659 | &test ("edi",-1); | 2656 | &test ("edi",-1); |
2660 | &jz (&label("badpointer")); | 2657 | &jz (&label("badpointer")); |
2661 | 2658 | ||
2662 | &call (&label("pic_point")); | 2659 | &picsetup($tbl); |
2663 | &set_label("pic_point"); | 2660 | &picsymbol($tbl, &label("AES_Te"), $tbl); |
2664 | &blindpop($tbl); | 2661 | |
2665 | &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl)); | ||
2666 | &lea ($tbl,&DWP(2048+128,$tbl)); | 2662 | &lea ($tbl,&DWP(2048+128,$tbl)); |
2667 | 2663 | ||
2668 | # prefetch Te4 | 2664 | # prefetch Te4 |
@@ -2975,6 +2971,5 @@ sub deckey() | |||
2975 | 2971 | ||
2976 | &xor ("eax","eax"); # return success | 2972 | &xor ("eax","eax"); # return success |
2977 | &function_end("AES_set_decrypt_key"); | 2973 | &function_end("AES_set_decrypt_key"); |
2978 | &asciz("AES for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
2979 | 2974 | ||
2980 | &asm_finish(); | 2975 | &asm_finish(); |
diff --git a/src/lib/libcrypto/aes/asm/aesni-x86.pl b/src/lib/libcrypto/aes/asm/aesni-x86.pl index 8c1d0b5bed..ff44415611 100644 --- a/src/lib/libcrypto/aes/asm/aesni-x86.pl +++ b/src/lib/libcrypto/aes/asm/aesni-x86.pl | |||
@@ -2184,6 +2184,5 @@ if ($PREFIX eq "aesni") { | |||
2184 | &set_label("dec_key_ret"); | 2184 | &set_label("dec_key_ret"); |
2185 | &ret (); | 2185 | &ret (); |
2186 | &function_end_B("${PREFIX}_set_decrypt_key"); | 2186 | &function_end_B("${PREFIX}_set_decrypt_key"); |
2187 | &asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>"); | ||
2188 | 2187 | ||
2189 | &asm_finish(); | 2188 | &asm_finish(); |
diff --git a/src/lib/libcrypto/aes/asm/vpaes-x86.pl b/src/lib/libcrypto/aes/asm/vpaes-x86.pl index 1533e2c304..38cef61733 100644 --- a/src/lib/libcrypto/aes/asm/vpaes-x86.pl +++ b/src/lib/libcrypto/aes/asm/vpaes-x86.pl | |||
@@ -57,6 +57,7 @@ $PREFIX="vpaes"; | |||
57 | my ($round, $base, $magic, $key, $const, $inp, $out)= | 57 | my ($round, $base, $magic, $key, $const, $inp, $out)= |
58 | ("eax", "ebx", "ecx", "edx","ebp", "esi","edi"); | 58 | ("eax", "ebx", "ecx", "edx","ebp", "esi","edi"); |
59 | 59 | ||
60 | &rodataseg(); | ||
60 | &static_label("_vpaes_consts"); | 61 | &static_label("_vpaes_consts"); |
61 | &static_label("_vpaes_schedule_low_round"); | 62 | &static_label("_vpaes_schedule_low_round"); |
62 | 63 | ||
@@ -153,8 +154,7 @@ $k_dsbe=0x2a0; # decryption sbox output *E*u, *E*t | |||
153 | $k_dsbo=0x2c0; # decryption sbox final output | 154 | $k_dsbo=0x2c0; # decryption sbox final output |
154 | &data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9); | 155 | &data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9); |
155 | &data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159); | 156 | &data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159); |
156 | &asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)"); | 157 | &previous(); |
157 | &align (64); | ||
158 | 158 | ||
159 | &function_begin_B("_vpaes_preheat"); | 159 | &function_begin_B("_vpaes_preheat"); |
160 | &add ($const,&DWP(0,"esp")); | 160 | &add ($const,&DWP(0,"esp")); |
@@ -762,9 +762,11 @@ $k_dsbo=0x2c0; # decryption sbox final output | |||
762 | &mov ($magic,0x30); | 762 | &mov ($magic,0x30); |
763 | &mov ($out,0); | 763 | &mov ($out,0); |
764 | 764 | ||
765 | &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); | 765 | &picsetup($const); |
766 | &picsymbol($const, &label("_vpaes_consts"), $const); | ||
767 | &lea ($const,&DWP(0x30,$const)) | ||
768 | |||
766 | &call ("_vpaes_schedule_core"); | 769 | &call ("_vpaes_schedule_core"); |
767 | &set_label("pic_point"); | ||
768 | 770 | ||
769 | &mov ("esp",&DWP(48,"esp")); | 771 | &mov ("esp",&DWP(48,"esp")); |
770 | &xor ("eax","eax"); | 772 | &xor ("eax","eax"); |
@@ -792,18 +794,22 @@ $k_dsbo=0x2c0; # decryption sbox final output | |||
792 | &and ($magic,32); | 794 | &and ($magic,32); |
793 | &xor ($magic,32); # nbist==192?0:32; | 795 | &xor ($magic,32); # nbist==192?0:32; |
794 | 796 | ||
795 | &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); | 797 | &picsetup($const); |
798 | &picsymbol($const, &label("_vpaes_consts"), $const); | ||
799 | &lea ($const,&DWP(0x30,$const)) | ||
800 | |||
796 | &call ("_vpaes_schedule_core"); | 801 | &call ("_vpaes_schedule_core"); |
797 | &set_label("pic_point"); | ||
798 | 802 | ||
799 | &mov ("esp",&DWP(48,"esp")); | 803 | &mov ("esp",&DWP(48,"esp")); |
800 | &xor ("eax","eax"); | 804 | &xor ("eax","eax"); |
801 | &function_end("${PREFIX}_set_decrypt_key"); | 805 | &function_end("${PREFIX}_set_decrypt_key"); |
802 | 806 | ||
803 | &function_begin("${PREFIX}_encrypt"); | 807 | &function_begin("${PREFIX}_encrypt"); |
804 | &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); | 808 | &picsetup($const); |
809 | &picsymbol($const, &label("_vpaes_consts"), $const); | ||
810 | &lea ($const,&DWP(0x30,$const)) | ||
811 | |||
805 | &call ("_vpaes_preheat"); | 812 | &call ("_vpaes_preheat"); |
806 | &set_label("pic_point"); | ||
807 | &mov ($inp,&wparam(0)); # inp | 813 | &mov ($inp,&wparam(0)); # inp |
808 | &lea ($base,&DWP(-56,"esp")); | 814 | &lea ($base,&DWP(-56,"esp")); |
809 | &mov ($out,&wparam(1)); # out | 815 | &mov ($out,&wparam(1)); # out |
@@ -820,9 +826,11 @@ $k_dsbo=0x2c0; # decryption sbox final output | |||
820 | &function_end("${PREFIX}_encrypt"); | 826 | &function_end("${PREFIX}_encrypt"); |
821 | 827 | ||
822 | &function_begin("${PREFIX}_decrypt"); | 828 | &function_begin("${PREFIX}_decrypt"); |
823 | &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); | 829 | &picsetup($const); |
830 | &picsymbol($const, &label("_vpaes_consts"), $const); | ||
831 | &lea ($const,&DWP(0x30,$const)) | ||
832 | |||
824 | &call ("_vpaes_preheat"); | 833 | &call ("_vpaes_preheat"); |
825 | &set_label("pic_point"); | ||
826 | &mov ($inp,&wparam(0)); # inp | 834 | &mov ($inp,&wparam(0)); # inp |
827 | &lea ($base,&DWP(-56,"esp")); | 835 | &lea ($base,&DWP(-56,"esp")); |
828 | &mov ($out,&wparam(1)); # out | 836 | &mov ($out,&wparam(1)); # out |
@@ -859,9 +867,11 @@ $k_dsbo=0x2c0; # decryption sbox final output | |||
859 | &mov (&DWP(8,"esp"),$const); # save ivp | 867 | &mov (&DWP(8,"esp"),$const); # save ivp |
860 | &mov ($out,$round); # $out works as $len | 868 | &mov ($out,$round); # $out works as $len |
861 | 869 | ||
862 | &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); | 870 | &picsetup($const); |
871 | &picsymbol($const, &label("_vpaes_consts"), $const); | ||
872 | &lea ($const,&DWP(0x30,$const)) | ||
873 | |||
863 | &call ("_vpaes_preheat"); | 874 | &call ("_vpaes_preheat"); |
864 | &set_label("pic_point"); | ||
865 | &cmp ($magic,0); | 875 | &cmp ($magic,0); |
866 | &je (&label("cbc_dec_loop")); | 876 | &je (&label("cbc_dec_loop")); |
867 | &jmp (&label("cbc_enc_loop")); | 877 | &jmp (&label("cbc_enc_loop")); |
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl index c4e2baa6c5..b502fe60ee 100644 --- a/src/lib/libcrypto/bn/asm/bn-586.pl +++ b/src/lib/libcrypto/bn/asm/bn-586.pl | |||
@@ -32,7 +32,8 @@ sub bn_mul_add_words | |||
32 | $c="ecx"; | 32 | $c="ecx"; |
33 | 33 | ||
34 | if ($sse2) { | 34 | if ($sse2) { |
35 | &picmeup("eax","OPENSSL_ia32cap_P"); | 35 | &picsetup("eax"); |
36 | &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); | ||
36 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); | 37 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
37 | &jnc(&label("maw_non_sse2")); | 38 | &jnc(&label("maw_non_sse2")); |
38 | 39 | ||
@@ -218,7 +219,8 @@ sub bn_mul_words | |||
218 | $c="ecx"; | 219 | $c="ecx"; |
219 | 220 | ||
220 | if ($sse2) { | 221 | if ($sse2) { |
221 | &picmeup("eax","OPENSSL_ia32cap_P"); | 222 | &picsetup("eax"); |
223 | &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); | ||
222 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); | 224 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
223 | &jnc(&label("mw_non_sse2")); | 225 | &jnc(&label("mw_non_sse2")); |
224 | 226 | ||
@@ -329,7 +331,8 @@ sub bn_sqr_words | |||
329 | $c="ecx"; | 331 | $c="ecx"; |
330 | 332 | ||
331 | if ($sse2) { | 333 | if ($sse2) { |
332 | &picmeup("eax","OPENSSL_ia32cap_P"); | 334 | &picsetup("eax"); |
335 | &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); | ||
333 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); | 336 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
334 | &jnc(&label("sqr_non_sse2")); | 337 | &jnc(&label("sqr_non_sse2")); |
335 | 338 | ||
diff --git a/src/lib/libcrypto/bn/asm/x86-gf2m.pl b/src/lib/libcrypto/bn/asm/x86-gf2m.pl index 9715b2158f..cb2f2a5c30 100644 --- a/src/lib/libcrypto/bn/asm/x86-gf2m.pl +++ b/src/lib/libcrypto/bn/asm/x86-gf2m.pl | |||
@@ -200,7 +200,8 @@ $R="mm0"; | |||
200 | # void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0); | 200 | # void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0); |
201 | &function_begin_B("bn_GF2m_mul_2x2"); | 201 | &function_begin_B("bn_GF2m_mul_2x2"); |
202 | if (!$x86only) { | 202 | if (!$x86only) { |
203 | &picmeup("edx","OPENSSL_ia32cap_P"); | 203 | &picsetup("edx"); |
204 | &picsymbol("edx", "OPENSSL_ia32cap_P", "edx"); | ||
204 | &mov ("eax",&DWP(0,"edx")); | 205 | &mov ("eax",&DWP(0,"edx")); |
205 | &mov ("edx",&DWP(4,"edx")); | 206 | &mov ("edx",&DWP(4,"edx")); |
206 | &test ("eax","\$IA32CAP_MASK0_MMX"); # check MMX bit | 207 | &test ("eax","\$IA32CAP_MASK0_MMX"); # check MMX bit |
@@ -308,6 +309,4 @@ if ($sse2) { | |||
308 | &ret (); | 309 | &ret (); |
309 | &function_end_B("bn_GF2m_mul_2x2"); | 310 | &function_end_B("bn_GF2m_mul_2x2"); |
310 | 311 | ||
311 | &asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
312 | |||
313 | &asm_finish(); | 312 | &asm_finish(); |
diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl index e6c04739b1..6524651748 100755 --- a/src/lib/libcrypto/bn/asm/x86-mont.pl +++ b/src/lib/libcrypto/bn/asm/x86-mont.pl | |||
@@ -113,7 +113,8 @@ $mul1="mm5"; | |||
113 | $temp="mm6"; | 113 | $temp="mm6"; |
114 | $mask="mm7"; | 114 | $mask="mm7"; |
115 | 115 | ||
116 | &picmeup("eax","OPENSSL_ia32cap_P"); | 116 | &picsetup("eax"); |
117 | &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); | ||
117 | &bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); | 118 | &bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
118 | &jnc (&label("non_sse2")); | 119 | &jnc (&label("non_sse2")); |
119 | 120 | ||
@@ -588,6 +589,4 @@ $sbit=$num; | |||
588 | &set_label("just_leave"); | 589 | &set_label("just_leave"); |
589 | &function_end("bn_mul_mont"); | 590 | &function_end("bn_mul_mont"); |
590 | 591 | ||
591 | &asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
592 | |||
593 | &asm_finish(); | 592 | &asm_finish(); |
diff --git a/src/lib/libcrypto/camellia/asm/cmll-x86.pl b/src/lib/libcrypto/camellia/asm/cmll-x86.pl index 027302ac86..a4ab11e54d 100644 --- a/src/lib/libcrypto/camellia/asm/cmll-x86.pl +++ b/src/lib/libcrypto/camellia/asm/cmll-x86.pl | |||
@@ -141,10 +141,8 @@ my $t0=@T[($j)%4],$t1=@T[($j+1)%4],$t2=@T[($j+2)%4],$t3=@T[($j+3)%4]; | |||
141 | &mov ($_esp,"ebx"); # save %esp | 141 | &mov ($_esp,"ebx"); # save %esp |
142 | &mov ($_end,"eax"); # save keyEnd | 142 | &mov ($_end,"eax"); # save keyEnd |
143 | 143 | ||
144 | &call (&label("pic_point")); | 144 | &picsetup($Tbl); |
145 | &set_label("pic_point"); | 145 | &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); |
146 | &blindpop($Tbl); | ||
147 | &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); | ||
148 | 146 | ||
149 | &mov (@T[0],&DWP(0,$idx)); # load plaintext | 147 | &mov (@T[0],&DWP(0,$idx)); # load plaintext |
150 | &mov (@T[1],&DWP(4,$idx)); | 148 | &mov (@T[1],&DWP(4,$idx)); |
@@ -206,10 +204,8 @@ if ($OPENSSL) { | |||
206 | &mov ($_esp,"ebx"); # save %esp | 204 | &mov ($_esp,"ebx"); # save %esp |
207 | &mov ($_end,"eax"); # save keyEnd | 205 | &mov ($_end,"eax"); # save keyEnd |
208 | 206 | ||
209 | &call (&label("pic_point")); | 207 | &picsetup($Tbl); |
210 | &set_label("pic_point"); | 208 | &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); |
211 | &blindpop($Tbl); | ||
212 | &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); | ||
213 | 209 | ||
214 | &mov (@T[0],&DWP(0,$idx)); # load plaintext | 210 | &mov (@T[0],&DWP(0,$idx)); # load plaintext |
215 | &mov (@T[1],&DWP(4,$idx)); | 211 | &mov (@T[1],&DWP(4,$idx)); |
@@ -316,10 +312,8 @@ if ($OPENSSL) { | |||
316 | &lea ($key,&DWP(0,$key,"eax")); | 312 | &lea ($key,&DWP(0,$key,"eax")); |
317 | &mov (&DWP(5*4,"esp"),"ebx");# save %esp | 313 | &mov (&DWP(5*4,"esp"),"ebx");# save %esp |
318 | 314 | ||
319 | &call (&label("pic_point")); | 315 | &picsetup($Tbl); |
320 | &set_label("pic_point"); | 316 | &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); |
321 | &blindpop($Tbl); | ||
322 | &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); | ||
323 | 317 | ||
324 | &mov (@T[0],&DWP(0,$idx)); # load ciphertext | 318 | &mov (@T[0],&DWP(0,$idx)); # load ciphertext |
325 | &mov (@T[1],&DWP(4,$idx)); | 319 | &mov (@T[1],&DWP(4,$idx)); |
@@ -381,10 +375,8 @@ if ($OPENSSL) { | |||
381 | &lea ($key,&DWP(0,$key,"eax")); | 375 | &lea ($key,&DWP(0,$key,"eax")); |
382 | &mov (&DWP(5*4,"esp"),"ebx");# save %esp | 376 | &mov (&DWP(5*4,"esp"),"ebx");# save %esp |
383 | 377 | ||
384 | &call (&label("pic_point")); | 378 | &picsetup($Tbl); |
385 | &set_label("pic_point"); | 379 | &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); |
386 | &blindpop($Tbl); | ||
387 | &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); | ||
388 | 380 | ||
389 | &mov (@T[0],&DWP(0,$idx)); # load ciphertext | 381 | &mov (@T[0],&DWP(0,$idx)); # load ciphertext |
390 | &mov (@T[1],&DWP(4,$idx)); | 382 | &mov (@T[1],&DWP(4,$idx)); |
@@ -594,10 +586,8 @@ my $bias=int(@T[0])?shift(@T):0; | |||
594 | &xor (@T[3],&DWP(1*8+4,$key)); | 586 | &xor (@T[3],&DWP(1*8+4,$key)); |
595 | 587 | ||
596 | &set_label("1st128",4); | 588 | &set_label("1st128",4); |
597 | &call (&label("pic_point")); | 589 | &picsetup($Tbl); |
598 | &set_label("pic_point"); | 590 | &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); |
599 | &blindpop($Tbl); | ||
600 | &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); | ||
601 | &lea ($key,&DWP(&label("Camellia_SIGMA")."-".&label("Camellia_SBOX"),$Tbl)); | 591 | &lea ($key,&DWP(&label("Camellia_SIGMA")."-".&label("Camellia_SBOX"),$Tbl)); |
602 | 592 | ||
603 | &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[0] | 593 | &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[0] |
@@ -786,6 +776,7 @@ sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; return $i<<24|$i<< | |||
786 | sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; return $i<<16|$i<<8|$i; } | 776 | sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; return $i<<16|$i<<8|$i; } |
787 | sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<8|$i; } | 777 | sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<8|$i; } |
788 | 778 | ||
779 | &rodataseg(); | ||
789 | &set_label("Camellia_SIGMA",64); | 780 | &set_label("Camellia_SIGMA",64); |
790 | &data_word( | 781 | &data_word( |
791 | 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2, | 782 | 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2, |
@@ -796,6 +787,7 @@ sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<< | |||
796 | # tables are interleaved, remember? | 787 | # tables are interleaved, remember? |
797 | for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); } | 788 | for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); } |
798 | for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); } | 789 | for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); } |
790 | &previous(); | ||
799 | 791 | ||
800 | # void Camellia_cbc_encrypt (const void char *inp, unsigned char *out, | 792 | # void Camellia_cbc_encrypt (const void char *inp, unsigned char *out, |
801 | # size_t length, const CAMELLIA_KEY *key, | 793 | # size_t length, const CAMELLIA_KEY *key, |
@@ -856,10 +848,8 @@ my ($s0,$s1,$s2,$s3) = @T; | |||
856 | &mov ($_key,$s3); # save copy of key | 848 | &mov ($_key,$s3); # save copy of key |
857 | &mov ($_ivp,$Tbl); # save copy of ivp | 849 | &mov ($_ivp,$Tbl); # save copy of ivp |
858 | 850 | ||
859 | &call (&label("pic_point")); # make it PIC! | 851 | &picsetup($Tbl); |
860 | &set_label("pic_point"); | 852 | &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); |
861 | &blindpop($Tbl); | ||
862 | &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); | ||
863 | 853 | ||
864 | &mov ($idx,32); | 854 | &mov ($idx,32); |
865 | &set_label("prefetch_sbox",4); | 855 | &set_label("prefetch_sbox",4); |
@@ -1133,6 +1123,4 @@ my ($s0,$s1,$s2,$s3) = @T; | |||
1133 | &function_end("Camellia_cbc_encrypt"); | 1123 | &function_end("Camellia_cbc_encrypt"); |
1134 | } | 1124 | } |
1135 | 1125 | ||
1136 | &asciz("Camellia for x86 by <appro\@openssl.org>"); | ||
1137 | |||
1138 | &asm_finish(); | 1126 | &asm_finish(); |
diff --git a/src/lib/libcrypto/des/asm/des-586.pl b/src/lib/libcrypto/des/asm/des-586.pl index 5b5f39cebd..e11b2ef80f 100644 --- a/src/lib/libcrypto/des/asm/des-586.pl +++ b/src/lib/libcrypto/des/asm/des-586.pl | |||
@@ -154,11 +154,8 @@ sub DES_encrypt | |||
154 | &rotl($L,3); | 154 | &rotl($L,3); |
155 | } | 155 | } |
156 | 156 | ||
157 | # PIC-ification:-) | 157 | &picsetup($trans); |
158 | &call (&label("pic_point")); | 158 | &picsymbol($trans, &label("DES_SPtrans"), $trans); |
159 | &set_label("pic_point"); | ||
160 | &blindpop($trans); | ||
161 | &lea ($trans,&DWP(&label("DES_SPtrans")."-".&label("pic_point"),$trans)); | ||
162 | 159 | ||
163 | &mov( "ecx", &wparam(1) ); | 160 | &mov( "ecx", &wparam(1) ); |
164 | 161 | ||
@@ -314,6 +311,7 @@ sub FP_new | |||
314 | 311 | ||
315 | sub DES_SPtrans | 312 | sub DES_SPtrans |
316 | { | 313 | { |
314 | &rodataseg(); | ||
317 | &set_label("DES_SPtrans",64); | 315 | &set_label("DES_SPtrans",64); |
318 | &data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802); | 316 | &data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802); |
319 | &data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002); | 317 | &data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002); |
@@ -450,4 +448,5 @@ sub DES_SPtrans | |||
450 | &data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000); | 448 | &data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000); |
451 | &data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000); | 449 | &data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000); |
452 | &data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080); | 450 | &data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080); |
451 | &previous(); | ||
453 | } | 452 | } |
diff --git a/src/lib/libcrypto/modes/asm/ghash-x86.pl b/src/lib/libcrypto/modes/asm/ghash-x86.pl index 27492597ad..5e868a43ff 100644 --- a/src/lib/libcrypto/modes/asm/ghash-x86.pl +++ b/src/lib/libcrypto/modes/asm/ghash-x86.pl | |||
@@ -411,10 +411,8 @@ $S=12; # shift factor for rem_4bit | |||
411 | &mov ($inp,&wparam(0)); # load Xi | 411 | &mov ($inp,&wparam(0)); # load Xi |
412 | &mov ($Htbl,&wparam(1)); # load Htable | 412 | &mov ($Htbl,&wparam(1)); # load Htable |
413 | 413 | ||
414 | &call (&label("pic_point")); | 414 | &picsetup("eax"); |
415 | &set_label("pic_point"); | 415 | &picsymbol("eax", &label("rem_4bit"), "eax"); |
416 | &blindpop("eax"); | ||
417 | &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); | ||
418 | 416 | ||
419 | &movz ($Zll,&BP(15,$inp)); | 417 | &movz ($Zll,&BP(15,$inp)); |
420 | 418 | ||
@@ -436,10 +434,8 @@ $S=12; # shift factor for rem_4bit | |||
436 | &mov ($inp,&wparam(2)); # load in | 434 | &mov ($inp,&wparam(2)); # load in |
437 | &mov ($Zlh,&wparam(3)); # load len | 435 | &mov ($Zlh,&wparam(3)); # load len |
438 | 436 | ||
439 | &call (&label("pic_point")); | 437 | &picsetup("eax"); |
440 | &set_label("pic_point"); | 438 | &picsymbol("eax", &label("rem_4bit"), "eax"); |
441 | &blindpop("eax"); | ||
442 | &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); | ||
443 | 439 | ||
444 | &add ($Zlh,$inp); | 440 | &add ($Zlh,$inp); |
445 | &mov (&wparam(3),$Zlh); # len to point at the end of input | 441 | &mov (&wparam(3),$Zlh); # len to point at the end of input |
@@ -584,10 +580,8 @@ sub mmx_loop() { | |||
584 | &mov ($inp,&wparam(0)); # load Xi | 580 | &mov ($inp,&wparam(0)); # load Xi |
585 | &mov ($Htbl,&wparam(1)); # load Htable | 581 | &mov ($Htbl,&wparam(1)); # load Htable |
586 | 582 | ||
587 | &call (&label("pic_point")); | 583 | &picsetup("eax"); |
588 | &set_label("pic_point"); | 584 | &picsymbol("eax", &label("rem_4bit"), "eax"); |
589 | &blindpop("eax"); | ||
590 | &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); | ||
591 | 585 | ||
592 | &movz ($Zll,&BP(15,$inp)); | 586 | &movz ($Zll,&BP(15,$inp)); |
593 | 587 | ||
@@ -618,10 +612,9 @@ sub mmx_loop() { | |||
618 | &mov ("ecx",&wparam(2)); # inp | 612 | &mov ("ecx",&wparam(2)); # inp |
619 | &mov ("edx",&wparam(3)); # len | 613 | &mov ("edx",&wparam(3)); # len |
620 | &mov ("ebp","esp"); # original %esp | 614 | &mov ("ebp","esp"); # original %esp |
621 | &call (&label("pic_point")); | 615 | |
622 | &set_label ("pic_point"); | 616 | &picsetup($rem_8bit); |
623 | &blindpop ($rem_8bit); | 617 | &picsymbol($rem_8bit, &label("rem_8bit"), $rem_8bit); |
624 | &lea ($rem_8bit,&DWP(&label("rem_8bit")."-".&label("pic_point"),$rem_8bit)); | ||
625 | 618 | ||
626 | &sub ("esp",512+16+16); # allocate stack frame... | 619 | &sub ("esp",512+16+16); # allocate stack frame... |
627 | &and ("esp",-64); # ...and align it | 620 | &and ("esp",-64); # ...and align it |
@@ -910,10 +903,8 @@ my ($Xhi,$Xi) = @_; | |||
910 | &mov ($Htbl,&wparam(0)); | 903 | &mov ($Htbl,&wparam(0)); |
911 | &mov ($Xip,&wparam(1)); | 904 | &mov ($Xip,&wparam(1)); |
912 | 905 | ||
913 | &call (&label("pic")); | 906 | &picsetup($const); |
914 | &set_label("pic"); | 907 | &picsymbol($const, &label("bswap"), $const); |
915 | &blindpop ($const); | ||
916 | &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); | ||
917 | 908 | ||
918 | &movdqu ($Hkey,&QWP(0,$Xip)); | 909 | &movdqu ($Hkey,&QWP(0,$Xip)); |
919 | &pshufd ($Hkey,$Hkey,0b01001110);# dword swap | 910 | &pshufd ($Hkey,$Hkey,0b01001110);# dword swap |
@@ -947,10 +938,8 @@ my ($Xhi,$Xi) = @_; | |||
947 | &mov ($Xip,&wparam(0)); | 938 | &mov ($Xip,&wparam(0)); |
948 | &mov ($Htbl,&wparam(1)); | 939 | &mov ($Htbl,&wparam(1)); |
949 | 940 | ||
950 | &call (&label("pic")); | 941 | &picsetup($const); |
951 | &set_label("pic"); | 942 | &picsymbol($const, &label("bswap"), $const); |
952 | &blindpop ($const); | ||
953 | &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); | ||
954 | 943 | ||
955 | &movdqu ($Xi,&QWP(0,$Xip)); | 944 | &movdqu ($Xi,&QWP(0,$Xip)); |
956 | &movdqa ($T3,&QWP(0,$const)); | 945 | &movdqa ($T3,&QWP(0,$const)); |
@@ -972,10 +961,8 @@ my ($Xhi,$Xi) = @_; | |||
972 | &mov ($inp,&wparam(2)); | 961 | &mov ($inp,&wparam(2)); |
973 | &mov ($len,&wparam(3)); | 962 | &mov ($len,&wparam(3)); |
974 | 963 | ||
975 | &call (&label("pic")); | 964 | &picsetup($const); |
976 | &set_label("pic"); | 965 | &picsymbol($const, &label("bswap"), $const); |
977 | &blindpop ($const); | ||
978 | &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); | ||
979 | 966 | ||
980 | &movdqu ($Xi,&QWP(0,$Xip)); | 967 | &movdqu ($Xi,&QWP(0,$Xip)); |
981 | &movdqa ($T3,&QWP(0,$const)); | 968 | &movdqa ($T3,&QWP(0,$const)); |
@@ -1138,10 +1125,8 @@ my ($Xhi,$Xi)=@_; | |||
1138 | &mov ($Htbl,&wparam(0)); | 1125 | &mov ($Htbl,&wparam(0)); |
1139 | &mov ($Xip,&wparam(1)); | 1126 | &mov ($Xip,&wparam(1)); |
1140 | 1127 | ||
1141 | &call (&label("pic")); | 1128 | &picsetup($const); |
1142 | &set_label("pic"); | 1129 | &picsymbol($const, &label("bswap"), $const); |
1143 | &blindpop ($const); | ||
1144 | &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); | ||
1145 | 1130 | ||
1146 | &movdqu ($Hkey,&QWP(0,$Xip)); | 1131 | &movdqu ($Hkey,&QWP(0,$Xip)); |
1147 | &pshufd ($Hkey,$Hkey,0b01001110);# dword swap | 1132 | &pshufd ($Hkey,$Hkey,0b01001110);# dword swap |
@@ -1161,10 +1146,8 @@ my ($Xhi,$Xi)=@_; | |||
1161 | &mov ($Xip,&wparam(0)); | 1146 | &mov ($Xip,&wparam(0)); |
1162 | &mov ($Htbl,&wparam(1)); | 1147 | &mov ($Htbl,&wparam(1)); |
1163 | 1148 | ||
1164 | &call (&label("pic")); | 1149 | &picsetup($const); |
1165 | &set_label("pic"); | 1150 | &picsymbol($const, &label("bswap"), $const); |
1166 | &blindpop ($const); | ||
1167 | &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); | ||
1168 | 1151 | ||
1169 | &movdqu ($Xi,&QWP(0,$Xip)); | 1152 | &movdqu ($Xi,&QWP(0,$Xip)); |
1170 | &movdqa ($Xn,&QWP(0,$const)); | 1153 | &movdqa ($Xn,&QWP(0,$const)); |
@@ -1186,10 +1169,8 @@ my ($Xhi,$Xi)=@_; | |||
1186 | &mov ($inp,&wparam(2)); | 1169 | &mov ($inp,&wparam(2)); |
1187 | &mov ($len,&wparam(3)); | 1170 | &mov ($len,&wparam(3)); |
1188 | 1171 | ||
1189 | &call (&label("pic")); | 1172 | &picsetup($const); |
1190 | &set_label("pic"); | 1173 | &picsymbol($const, &label("bswap"), $const); |
1191 | &blindpop ($const); | ||
1192 | &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); | ||
1193 | 1174 | ||
1194 | &movdqu ($Xi,&QWP(0,$Xip)); | 1175 | &movdqu ($Xi,&QWP(0,$Xip)); |
1195 | &movdqa ($T3,&QWP(0,$const)); | 1176 | &movdqa ($T3,&QWP(0,$const)); |
@@ -1270,11 +1251,14 @@ my ($Xhi,$Xi)=@_; | |||
1270 | 1251 | ||
1271 | } | 1252 | } |
1272 | 1253 | ||
1254 | &rodataseg(); | ||
1273 | &set_label("bswap",64); | 1255 | &set_label("bswap",64); |
1274 | &data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); | 1256 | &data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); |
1275 | &data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial | 1257 | &data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial |
1258 | &previous(); | ||
1276 | }} # $sse2 | 1259 | }} # $sse2 |
1277 | 1260 | ||
1261 | &rodataseg(); | ||
1278 | &set_label("rem_4bit",64); | 1262 | &set_label("rem_4bit",64); |
1279 | &data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S); | 1263 | &data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S); |
1280 | &data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S); | 1264 | &data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S); |
@@ -1313,9 +1297,9 @@ my ($Xhi,$Xi)=@_; | |||
1313 | &data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E); | 1297 | &data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E); |
1314 | &data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE); | 1298 | &data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE); |
1315 | &data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE); | 1299 | &data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE); |
1300 | &previous(); | ||
1316 | }}} # !$x86only | 1301 | }}} # !$x86only |
1317 | 1302 | ||
1318 | &asciz("GHASH for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
1319 | &asm_finish(); | 1303 | &asm_finish(); |
1320 | 1304 | ||
1321 | # A question was risen about choice of vanilla MMX. Or rather why wasn't | 1305 | # A question was risen about choice of vanilla MMX. Or rather why wasn't |
diff --git a/src/lib/libcrypto/perlasm/cbc.pl b/src/lib/libcrypto/perlasm/cbc.pl index 24561e759a..392f23e145 100644 --- a/src/lib/libcrypto/perlasm/cbc.pl +++ b/src/lib/libcrypto/perlasm/cbc.pl | |||
@@ -34,6 +34,15 @@ sub cbc | |||
34 | # p1,p2,p3 are the offsets for parameters to be passed to the | 34 | # p1,p2,p3 are the offsets for parameters to be passed to the |
35 | # underlying calls. | 35 | # underlying calls. |
36 | 36 | ||
37 | &static_label("cbc_enc_jmp_table_".$name); | ||
38 | &static_label("ej1_".$name); | ||
39 | &static_label("ej2_".$name); | ||
40 | &static_label("ej3_".$name); | ||
41 | &static_label("ej4_".$name); | ||
42 | &static_label("ej5_".$name); | ||
43 | &static_label("ej6_".$name); | ||
44 | &static_label("ej7_".$name); | ||
45 | |||
37 | &function_begin_B($name,""); | 46 | &function_begin_B($name,""); |
38 | &comment(""); | 47 | &comment(""); |
39 | 48 | ||
@@ -146,33 +155,32 @@ sub cbc | |||
146 | &mov($count, &wparam(2)); # length | 155 | &mov($count, &wparam(2)); # length |
147 | &and($count, 7); | 156 | &and($count, 7); |
148 | &jz(&label("finish")); | 157 | &jz(&label("finish")); |
149 | &call(&label("PIC_point")); | 158 | |
150 | &set_label("PIC_point"); | 159 | &picsetup("edx"); |
151 | &blindpop("edx"); | 160 | &picsymbol("ecx", &label("cbc_enc_jmp_table_".$name), "edx") |
152 | &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx")); | ||
153 | &mov($count,&DWP(0,"ecx",$count,4)); | 161 | &mov($count,&DWP(0,"ecx",$count,4)); |
154 | &add($count,"edx"); | 162 | &picadjust($count, "edx"); |
163 | |||
155 | &xor("ecx","ecx"); | 164 | &xor("ecx","ecx"); |
156 | &xor("edx","edx"); | 165 | &xor("edx","edx"); |
157 | #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4)); | ||
158 | &jmp_ptr($count); | 166 | &jmp_ptr($count); |
159 | 167 | ||
160 | &set_label("ej7"); | 168 | &set_label("ej7_".$name); |
161 | &movb(&HB("edx"), &BP(6,$in,"",0)); | 169 | &movb(&HB("edx"), &BP(6,$in,"",0)); |
162 | &shl("edx",8); | 170 | &shl("edx",8); |
163 | &set_label("ej6"); | 171 | &set_label("ej6_".$name); |
164 | &movb(&HB("edx"), &BP(5,$in,"",0)); | 172 | &movb(&HB("edx"), &BP(5,$in,"",0)); |
165 | &set_label("ej5"); | 173 | &set_label("ej5_".$name); |
166 | &movb(&LB("edx"), &BP(4,$in,"",0)); | 174 | &movb(&LB("edx"), &BP(4,$in,"",0)); |
167 | &set_label("ej4"); | 175 | &set_label("ej4_".$name); |
168 | &mov("ecx", &DWP(0,$in,"",0)); | 176 | &mov("ecx", &DWP(0,$in,"",0)); |
169 | &jmp(&label("ejend")); | 177 | &jmp(&label("ejend")); |
170 | &set_label("ej3"); | 178 | &set_label("ej3_".$name); |
171 | &movb(&HB("ecx"), &BP(2,$in,"",0)); | 179 | &movb(&HB("ecx"), &BP(2,$in,"",0)); |
172 | &shl("ecx",8); | 180 | &shl("ecx",8); |
173 | &set_label("ej2"); | 181 | &set_label("ej2_".$name); |
174 | &movb(&HB("ecx"), &BP(1,$in,"",0)); | 182 | &movb(&HB("ecx"), &BP(1,$in,"",0)); |
175 | &set_label("ej1"); | 183 | &set_label("ej1_".$name); |
176 | &movb(&LB("ecx"), &BP(0,$in,"",0)); | 184 | &movb(&LB("ecx"), &BP(0,$in,"",0)); |
177 | &set_label("ejend"); | 185 | &set_label("ejend"); |
178 | 186 | ||
@@ -279,30 +287,14 @@ sub cbc | |||
279 | &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, | 287 | &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, |
280 | &mov("ebx", &DWP(4,$in,"",0)); # next iv actually | 288 | &mov("ebx", &DWP(4,$in,"",0)); # next iv actually |
281 | 289 | ||
282 | &set_label("dj7"); | ||
283 | &rotr("edx", 16); | 290 | &rotr("edx", 16); |
284 | &movb(&BP(6,$out,"",0), &LB("edx")); | 291 | &movb(&BP(6,$out,"",0), &LB("edx")); |
285 | &shr("edx",16); | 292 | &shr("edx",16); |
286 | &set_label("dj6"); | ||
287 | &movb(&BP(5,$out,"",0), &HB("edx")); | 293 | &movb(&BP(5,$out,"",0), &HB("edx")); |
288 | &set_label("dj5"); | ||
289 | &movb(&BP(4,$out,"",0), &LB("edx")); | 294 | &movb(&BP(4,$out,"",0), &LB("edx")); |
290 | &set_label("dj4"); | ||
291 | &mov(&DWP(0,$out,"",0), "ecx"); | 295 | &mov(&DWP(0,$out,"",0), "ecx"); |
292 | &jmp(&label("djend")); | ||
293 | &set_label("dj3"); | ||
294 | &rotr("ecx", 16); | ||
295 | &movb(&BP(2,$out,"",0), &LB("ecx")); | ||
296 | &shl("ecx",16); | ||
297 | &set_label("dj2"); | ||
298 | &movb(&BP(1,$in,"",0), &HB("ecx")); | ||
299 | &set_label("dj1"); | ||
300 | &movb(&BP(0,$in,"",0), &LB("ecx")); | ||
301 | &set_label("djend"); | ||
302 | 296 | ||
303 | # final iv is still in eax:ebx | 297 | # final iv is still in eax:ebx |
304 | &jmp(&label("finish")); | ||
305 | |||
306 | 298 | ||
307 | ############################ FINISH #######################3 | 299 | ############################ FINISH #######################3 |
308 | &set_label("finish",1); | 300 | &set_label("finish",1); |
@@ -319,31 +311,21 @@ sub cbc | |||
319 | &mov(&DWP(4,"ecx","",0), "ebx"); # save iv | 311 | &mov(&DWP(4,"ecx","",0), "ebx"); # save iv |
320 | 312 | ||
321 | &function_end_A($name); | 313 | &function_end_A($name); |
314 | &function_end_B($name); | ||
322 | 315 | ||
316 | &rodataseg(); | ||
323 | &align(64); | 317 | &align(64); |
324 | &set_label("cbc_enc_jmp_table"); | 318 | &set_label("cbc_enc_jmp_table_".$name); |
325 | &data_word("0"); | 319 | &data_word("0"); |
326 | &data_word(&label("ej1")."-".&label("PIC_point")); | 320 | &data_word(&code_sym(&label("ej1_".$name))); |
327 | &data_word(&label("ej2")."-".&label("PIC_point")); | 321 | &data_word(&code_sym(&label("ej2_".$name))); |
328 | &data_word(&label("ej3")."-".&label("PIC_point")); | 322 | &data_word(&code_sym(&label("ej3_".$name))); |
329 | &data_word(&label("ej4")."-".&label("PIC_point")); | 323 | &data_word(&code_sym(&label("ej4_".$name))); |
330 | &data_word(&label("ej5")."-".&label("PIC_point")); | 324 | &data_word(&code_sym(&label("ej5_".$name))); |
331 | &data_word(&label("ej6")."-".&label("PIC_point")); | 325 | &data_word(&code_sym(&label("ej6_".$name))); |
332 | &data_word(&label("ej7")."-".&label("PIC_point")); | 326 | &data_word(&code_sym(&label("ej7_".$name))); |
333 | # not used | 327 | &previous(); |
334 | #&set_label("cbc_dec_jmp_table",1); | ||
335 | #&data_word("0"); | ||
336 | #&data_word(&label("dj1")."-".&label("PIC_point")); | ||
337 | #&data_word(&label("dj2")."-".&label("PIC_point")); | ||
338 | #&data_word(&label("dj3")."-".&label("PIC_point")); | ||
339 | #&data_word(&label("dj4")."-".&label("PIC_point")); | ||
340 | #&data_word(&label("dj5")."-".&label("PIC_point")); | ||
341 | #&data_word(&label("dj6")."-".&label("PIC_point")); | ||
342 | #&data_word(&label("dj7")."-".&label("PIC_point")); | ||
343 | &align(64); | ||
344 | 328 | ||
345 | &function_end_B($name); | ||
346 | |||
347 | } | 329 | } |
348 | 330 | ||
349 | 1; | 331 | 1; |
diff --git a/src/lib/libcrypto/perlasm/x86gas.pl b/src/lib/libcrypto/perlasm/x86gas.pl index ca644ba553..f28a590549 100644 --- a/src/lib/libcrypto/perlasm/x86gas.pl +++ b/src/lib/libcrypto/perlasm/x86gas.pl | |||
@@ -177,34 +177,52 @@ sub ::align | |||
177 | push(@out,".align\t$val\n"); | 177 | push(@out,".align\t$val\n"); |
178 | } | 178 | } |
179 | 179 | ||
180 | sub ::picmeup | 180 | # |
181 | { my($dst,$sym,$base,$reflabel)=@_; | 181 | # PIC data access wrappers |
182 | 182 | # | |
183 | if ($::openbsd) | 183 | # Usage: |
184 | { &::emitraw("#if defined(PIC) || defined(__PIC__)"); | 184 | # picsetup($base) |
185 | &::emitraw("PIC_PROLOGUE"); | 185 | # - only allowed once per function (because of hardcoded label name), |
186 | &::mov($dst, &::DWP("PIC_GOT($sym)")); | 186 | # sets up pic access, uses $base register as temporary |
187 | &::emitraw("PIC_EPILOGUE"); | 187 | # picsymbol($dst, $sym, $base) |
188 | &::emitraw("#else /* PIC */"); | 188 | # - loads the address of symbol $sym into $dst with the help of $base |
189 | &::lea($dst,&::DWP($sym)); | 189 | # initialized by picsetup |
190 | &::emitraw("#endif /* PIC */"); | 190 | # picadjust($sym, $base) |
191 | } | 191 | # - adjusts a code pointer read from a code_sym table with the help of |
192 | elsif (($::pic && ($::elf || $::aout)) || $::macosx) | 192 | # $base initialized by picsetup |
193 | { if (!defined($base)) | 193 | # code_sym($sym) |
194 | { &::call(&::label("PIC_me_up")); | 194 | # - emits a pointer to the given code symbol, relative to the GOT if |
195 | &::set_label("PIC_me_up"); | 195 | # PIC. This pointer will need to be adjusted with picadjust above |
196 | &::blindpop($dst); | 196 | # before use. |
197 | $base=$dst; | 197 | |
198 | $reflabel=&::label("PIC_me_up"); | 198 | sub ::picsetup |
199 | } | 199 | { my($base)=@_; |
200 | |||
201 | if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx) | ||
202 | { | ||
203 | &::call(&::label("PIC_setup")); | ||
204 | &::set_label("PIC_setup"); | ||
205 | &::blindpop($base); | ||
200 | if ($::macosx) | 206 | if ($::macosx) |
201 | { my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr"); | 207 | { my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr"); |
202 | &::mov($dst,&::DWP("$indirect-$reflabel",$base)); | ||
203 | $non_lazy_ptr{"$nmdecor$sym"}=$indirect; | 208 | $non_lazy_ptr{"$nmdecor$sym"}=$indirect; |
204 | } | 209 | } |
210 | } | ||
211 | } | ||
212 | |||
213 | sub ::picsymbol | ||
214 | { my($dst,$sym,$base)=@_; | ||
215 | |||
216 | if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx) | ||
217 | { | ||
218 | my $reflabel=&::label("PIC_setup"); | ||
219 | if ($::macosx) | ||
220 | { my $indirect=$non_lazy_ptr{"$nmdecor$sym"}; | ||
221 | &::mov($dst,&::DWP("$indirect-$reflabel",$base)); | ||
222 | } | ||
205 | else | 223 | else |
206 | { &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", | 224 | { &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", |
207 | $base)); | 225 | $base)); |
208 | &::mov($dst,&::DWP("$sym\@GOT",$dst)); | 226 | &::mov($dst,&::DWP("$sym\@GOT",$dst)); |
209 | } | 227 | } |
210 | } | 228 | } |
@@ -212,6 +230,30 @@ sub ::picmeup | |||
212 | { &::lea($dst,&::DWP($sym)); } | 230 | { &::lea($dst,&::DWP($sym)); } |
213 | } | 231 | } |
214 | 232 | ||
233 | sub ::picadjust | ||
234 | { my($sym,$base)=@_; | ||
235 | |||
236 | if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx) | ||
237 | { | ||
238 | my $reflabel=&::label("PIC_setup"); | ||
239 | &::lea($sym,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", | ||
240 | $base,$sym)); | ||
241 | } | ||
242 | } | ||
243 | |||
244 | sub ::code_sym | ||
245 | { my($sym)=@_; | ||
246 | |||
247 | if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx) | ||
248 | { | ||
249 | $sym."\@GOTOFF"; | ||
250 | } | ||
251 | else | ||
252 | { | ||
253 | $sym; | ||
254 | } | ||
255 | } | ||
256 | |||
215 | sub ::initseg | 257 | sub ::initseg |
216 | { my $f=$nmdecor.shift; | 258 | { my $f=$nmdecor.shift; |
217 | 259 | ||
@@ -264,4 +306,10 @@ ___ | |||
264 | sub ::dataseg | 306 | sub ::dataseg |
265 | { push(@out,".data\n"); } | 307 | { push(@out,".data\n"); } |
266 | 308 | ||
309 | sub ::rodataseg | ||
310 | { push(@out,".rodata\n"); } | ||
311 | |||
312 | sub ::previous | ||
313 | { push(@out,".previous\n"); } | ||
314 | |||
267 | 1; | 315 | 1; |
diff --git a/src/lib/libcrypto/rc4/asm/rc4-586.pl b/src/lib/libcrypto/rc4/asm/rc4-586.pl index f3c3e117bc..4991c37c2c 100644 --- a/src/lib/libcrypto/rc4/asm/rc4-586.pl +++ b/src/lib/libcrypto/rc4/asm/rc4-586.pl | |||
@@ -188,7 +188,8 @@ if ($alt=0) { | |||
188 | &mov (&wparam(3),$out); # $out as accumulator in these loops | 188 | &mov (&wparam(3),$out); # $out as accumulator in these loops |
189 | &jz (&label("go4loop4")); | 189 | &jz (&label("go4loop4")); |
190 | 190 | ||
191 | &picmeup($out,"OPENSSL_ia32cap_P"); | 191 | &picsetup($out); |
192 | &picsymbol($out, "OPENSSL_ia32cap_P", $out); | ||
192 | # check SSE2 bit [could have been MMX] | 193 | # check SSE2 bit [could have been MMX] |
193 | &bt (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2"); | 194 | &bt (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2"); |
194 | &jnc (&label("go4loop4")); | 195 | &jnc (&label("go4loop4")); |
@@ -305,7 +306,9 @@ $idx="edx"; | |||
305 | &mov ($out,&wparam(0)); # load key | 306 | &mov ($out,&wparam(0)); # load key |
306 | &mov ($idi,&wparam(1)); # load len | 307 | &mov ($idi,&wparam(1)); # load len |
307 | &mov ($inp,&wparam(2)); # load data | 308 | &mov ($inp,&wparam(2)); # load data |
308 | &picmeup($idx,"OPENSSL_ia32cap_P"); | 309 | |
310 | &picsetup($idx); | ||
311 | &picsymbol($idx, "OPENSSL_ia32cap_P", $idx); | ||
309 | 312 | ||
310 | &lea ($out,&DWP(2*4,$out)); # &key->data | 313 | &lea ($out,&DWP(2*4,$out)); # &key->data |
311 | &lea ($inp,&DWP(0,$inp,$idi)); # $inp to point at the end | 314 | &lea ($inp,&DWP(0,$inp,$idi)); # $inp to point at the end |
@@ -382,12 +385,12 @@ $idx="edx"; | |||
382 | &function_end("RC4_set_key"); | 385 | &function_end("RC4_set_key"); |
383 | 386 | ||
384 | # const char *RC4_options(void); | 387 | # const char *RC4_options(void); |
388 | &static_label("opts"); | ||
385 | &function_begin_B("RC4_options"); | 389 | &function_begin_B("RC4_options"); |
386 | &call (&label("pic_point")); | 390 | &picsetup("edx"); |
387 | &set_label("pic_point"); | 391 | &picsymbol("eax", &label("opts"), "edx"); |
388 | &blindpop("eax"); | 392 | &picsymbol("edx", "OPENSSL_ia32cap_P", "edx");; |
389 | &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax")); | 393 | |
390 | &picmeup("edx","OPENSSL_ia32cap_P"); | ||
391 | &mov ("edx",&DWP(0,"edx")); | 394 | &mov ("edx",&DWP(0,"edx")); |
392 | &bt ("edx","\$IA32CAP_BIT0_INTELP4"); | 395 | &bt ("edx","\$IA32CAP_BIT0_INTELP4"); |
393 | &jc (&label("1xchar")); | 396 | &jc (&label("1xchar")); |
@@ -399,13 +402,14 @@ $idx="edx"; | |||
399 | &add ("eax",12); | 402 | &add ("eax",12); |
400 | &set_label("ret"); | 403 | &set_label("ret"); |
401 | &ret (); | 404 | &ret (); |
402 | &set_label("opts",64); | 405 | &function_end_B("RC4_options"); |
406 | |||
407 | &rodataseg(); | ||
408 | &set_label("opts"); | ||
403 | &asciz ("rc4(4x,int)"); | 409 | &asciz ("rc4(4x,int)"); |
404 | &asciz ("rc4(1x,char)"); | 410 | &asciz ("rc4(1x,char)"); |
405 | &asciz ("rc4(8x,mmx)"); | 411 | &asciz ("rc4(8x,mmx)"); |
406 | &asciz ("RC4 for x86, CRYPTOGAMS by <appro\@openssl.org>"); | 412 | &previous(); |
407 | &align (64); | ||
408 | &function_end_B("RC4_options"); | ||
409 | 413 | ||
410 | &asm_finish(); | 414 | &asm_finish(); |
411 | 415 | ||
diff --git a/src/lib/libcrypto/sha/asm/sha1-586.pl b/src/lib/libcrypto/sha/asm/sha1-586.pl index 1de5e2650e..5928e083c1 100644 --- a/src/lib/libcrypto/sha/asm/sha1-586.pl +++ b/src/lib/libcrypto/sha/asm/sha1-586.pl | |||
@@ -295,11 +295,9 @@ if ($xmm) { | |||
295 | &static_label("avx_shortcut") if ($ymm); | 295 | &static_label("avx_shortcut") if ($ymm); |
296 | &static_label("K_XX_XX"); | 296 | &static_label("K_XX_XX"); |
297 | 297 | ||
298 | &call (&label("pic_point")); # make it PIC! | 298 | &picsetup($tmp1); |
299 | &set_label("pic_point"); | 299 | &picsymbol($T, "OPENSSL_ia32cap_P", $tmp1); |
300 | &blindpop($tmp1); | 300 | &picsymbol($tmp1, &label("K_XX_XX"), $tmp1); |
301 | &picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point")); | ||
302 | &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); | ||
303 | 301 | ||
304 | &mov ($A,&DWP(0,$T)); | 302 | &mov ($A,&DWP(0,$T)); |
305 | &mov ($D,&DWP(4,$T)); | 303 | &mov ($D,&DWP(4,$T)); |
@@ -419,10 +417,9 @@ my $_rol=sub { &rol(@_) }; | |||
419 | my $_ror=sub { &ror(@_) }; | 417 | my $_ror=sub { &ror(@_) }; |
420 | 418 | ||
421 | &function_begin("_sha1_block_data_order_ssse3"); | 419 | &function_begin("_sha1_block_data_order_ssse3"); |
422 | &call (&label("pic_point")); # make it PIC! | 420 | &picsetup($tmp1); |
423 | &set_label("pic_point"); | 421 | &picsymbol($tmp1, &label("K_XX_XX"), $tmp1); |
424 | &blindpop($tmp1); | 422 | |
425 | &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); | ||
426 | &set_label("ssse3_shortcut"); | 423 | &set_label("ssse3_shortcut"); |
427 | 424 | ||
428 | &movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19 | 425 | &movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19 |
@@ -861,10 +858,9 @@ my $_rol=sub { &shld(@_[0],@_) }; | |||
861 | my $_ror=sub { &shrd(@_[0],@_) }; | 858 | my $_ror=sub { &shrd(@_[0],@_) }; |
862 | 859 | ||
863 | &function_begin("_sha1_block_data_order_avx"); | 860 | &function_begin("_sha1_block_data_order_avx"); |
864 | &call (&label("pic_point")); # make it PIC! | 861 | &picsetup($tmp1); |
865 | &set_label("pic_point"); | 862 | &picsymbol($tmp1, &label("K_XX_XX"), $tmp1); |
866 | &blindpop($tmp1); | 863 | |
867 | &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); | ||
868 | &set_label("avx_shortcut"); | 864 | &set_label("avx_shortcut"); |
869 | &vzeroall(); | 865 | &vzeroall(); |
870 | 866 | ||
@@ -1213,13 +1209,15 @@ sub Xtail_avx() | |||
1213 | &mov (&DWP(16,@T[1]),$E); | 1209 | &mov (&DWP(16,@T[1]),$E); |
1214 | &function_end("_sha1_block_data_order_avx"); | 1210 | &function_end("_sha1_block_data_order_avx"); |
1215 | } | 1211 | } |
1212 | |||
1213 | &rodataseg(); | ||
1216 | &set_label("K_XX_XX",64); | 1214 | &set_label("K_XX_XX",64); |
1217 | &data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19 | 1215 | &data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19 |
1218 | &data_word(0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1); # K_20_39 | 1216 | &data_word(0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1); # K_20_39 |
1219 | &data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc); # K_40_59 | 1217 | &data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc); # K_40_59 |
1220 | &data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6); # K_60_79 | 1218 | &data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6); # K_60_79 |
1221 | &data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f); # pbswap mask | 1219 | &data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f); # pbswap mask |
1220 | &previous(); | ||
1222 | } | 1221 | } |
1223 | &asciz("SHA1 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
1224 | 1222 | ||
1225 | &asm_finish(); | 1223 | &asm_finish(); |
diff --git a/src/lib/libcrypto/sha/asm/sha256-586.pl b/src/lib/libcrypto/sha/asm/sha256-586.pl index 928ec53123..2b05c96063 100644 --- a/src/lib/libcrypto/sha/asm/sha256-586.pl +++ b/src/lib/libcrypto/sha/asm/sha256-586.pl | |||
@@ -96,16 +96,15 @@ sub BODY_00_15() { | |||
96 | &add ($A,"esi"); # h += K256[i] | 96 | &add ($A,"esi"); # h += K256[i] |
97 | } | 97 | } |
98 | 98 | ||
99 | &static_label("K256"); | ||
99 | &function_begin("sha256_block_data_order"); | 100 | &function_begin("sha256_block_data_order"); |
100 | &mov ("esi",wparam(0)); # ctx | 101 | &mov ("esi",wparam(0)); # ctx |
101 | &mov ("edi",wparam(1)); # inp | 102 | &mov ("edi",wparam(1)); # inp |
102 | &mov ("eax",wparam(2)); # num | 103 | &mov ("eax",wparam(2)); # num |
103 | &mov ("ebx","esp"); # saved sp | 104 | &mov ("ebx","esp"); # saved sp |
104 | 105 | ||
105 | &call (&label("pic_point")); # make it PIC! | 106 | &picsetup($K256); |
106 | &set_label("pic_point"); | 107 | &picsymbol($K256, &label("K256"), $K256); |
107 | &blindpop($K256); | ||
108 | &lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256)); | ||
109 | 108 | ||
110 | &sub ("esp",16); | 109 | &sub ("esp",16); |
111 | &and ("esp",-64); | 110 | &and ("esp",-64); |
@@ -225,8 +224,10 @@ sub BODY_00_15() { | |||
225 | 224 | ||
226 | &mov ("esp",&DWP(12,"esp")); # restore sp | 225 | &mov ("esp",&DWP(12,"esp")); # restore sp |
227 | &function_end_A(); | 226 | &function_end_A(); |
227 | &function_end_B("sha256_block_data_order"); | ||
228 | 228 | ||
229 | &set_label("K256",64); # Yes! I keep it in the code segment! | 229 | &rodataseg(); |
230 | &set_label("K256",64); | ||
230 | &data_word(0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5); | 231 | &data_word(0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5); |
231 | &data_word(0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5); | 232 | &data_word(0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5); |
232 | &data_word(0xd807aa98,0x12835b01,0x243185be,0x550c7dc3); | 233 | &data_word(0xd807aa98,0x12835b01,0x243185be,0x550c7dc3); |
@@ -243,7 +244,6 @@ sub BODY_00_15() { | |||
243 | &data_word(0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3); | 244 | &data_word(0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3); |
244 | &data_word(0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208); | 245 | &data_word(0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208); |
245 | &data_word(0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2); | 246 | &data_word(0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2); |
246 | &function_end_B("sha256_block_data_order"); | 247 | &previous(); |
247 | &asciz("SHA256 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
248 | 248 | ||
249 | &asm_finish(); | 249 | &asm_finish(); |
diff --git a/src/lib/libcrypto/sha/asm/sha512-586.pl b/src/lib/libcrypto/sha/asm/sha512-586.pl index 163361ebe9..c1d0684e92 100644 --- a/src/lib/libcrypto/sha/asm/sha512-586.pl +++ b/src/lib/libcrypto/sha/asm/sha512-586.pl | |||
@@ -261,16 +261,18 @@ sub BODY_00_15_x86 { | |||
261 | } | 261 | } |
262 | 262 | ||
263 | 263 | ||
264 | &static_label("K512"); | ||
264 | &function_begin("sha512_block_data_order"); | 265 | &function_begin("sha512_block_data_order"); |
265 | &mov ("esi",wparam(0)); # ctx | 266 | &mov ("esi",wparam(0)); # ctx |
266 | &mov ("edi",wparam(1)); # inp | 267 | &mov ("edi",wparam(1)); # inp |
267 | &mov ("eax",wparam(2)); # num | 268 | &mov ("eax",wparam(2)); # num |
268 | &mov ("ebx","esp"); # saved sp | 269 | &mov ("ebx","esp"); # saved sp |
269 | 270 | ||
270 | &call (&label("pic_point")); # make it PIC! | 271 | &picsetup($K512); |
271 | &set_label("pic_point"); | 272 | if ($sse2) { |
272 | &blindpop($K512); | 273 | &picsymbol("edx", "OPENSSL_ia32cap_P", $K512); |
273 | &lea ($K512,&DWP(&label("K512")."-".&label("pic_point"),$K512)); | 274 | } |
275 | &picsymbol($K512, &label("K512"), $K512); | ||
274 | 276 | ||
275 | &sub ("esp",16); | 277 | &sub ("esp",16); |
276 | &and ("esp",-64); | 278 | &and ("esp",-64); |
@@ -283,7 +285,6 @@ sub BODY_00_15_x86 { | |||
283 | &mov (&DWP(12,"esp"),"ebx"); # saved sp | 285 | &mov (&DWP(12,"esp"),"ebx"); # saved sp |
284 | 286 | ||
285 | if ($sse2) { | 287 | if ($sse2) { |
286 | &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512")); | ||
287 | &bt (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2"); | 288 | &bt (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2"); |
288 | &jnc (&label("loop_x86")); | 289 | &jnc (&label("loop_x86")); |
289 | 290 | ||
@@ -556,8 +557,10 @@ if ($sse2) { | |||
556 | 557 | ||
557 | &mov ("esp",&DWP(12,"esp")); # restore sp | 558 | &mov ("esp",&DWP(12,"esp")); # restore sp |
558 | &function_end_A(); | 559 | &function_end_A(); |
560 | &function_end_B("sha512_block_data_order"); | ||
559 | 561 | ||
560 | &set_label("K512",64); # Yes! I keep it in the code segment! | 562 | &rodataseg(); |
563 | &set_label("K512",64); | ||
561 | &data_word(0xd728ae22,0x428a2f98); # u64 | 564 | &data_word(0xd728ae22,0x428a2f98); # u64 |
562 | &data_word(0x23ef65cd,0x71374491); # u64 | 565 | &data_word(0x23ef65cd,0x71374491); # u64 |
563 | &data_word(0xec4d3b2f,0xb5c0fbcf); # u64 | 566 | &data_word(0xec4d3b2f,0xb5c0fbcf); # u64 |
@@ -638,7 +641,6 @@ if ($sse2) { | |||
638 | &data_word(0xfc657e2a,0x597f299c); # u64 | 641 | &data_word(0xfc657e2a,0x597f299c); # u64 |
639 | &data_word(0x3ad6faec,0x5fcb6fab); # u64 | 642 | &data_word(0x3ad6faec,0x5fcb6fab); # u64 |
640 | &data_word(0x4a475817,0x6c44198c); # u64 | 643 | &data_word(0x4a475817,0x6c44198c); # u64 |
641 | &function_end_B("sha512_block_data_order"); | 644 | &previous(); |
642 | &asciz("SHA512 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
643 | 645 | ||
644 | &asm_finish(); | 646 | &asm_finish(); |
diff --git a/src/lib/libcrypto/whrlpool/asm/wp-mmx.pl b/src/lib/libcrypto/whrlpool/asm/wp-mmx.pl index 0ff8e5b612..a54d702c3f 100644 --- a/src/lib/libcrypto/whrlpool/asm/wp-mmx.pl +++ b/src/lib/libcrypto/whrlpool/asm/wp-mmx.pl | |||
@@ -77,6 +77,8 @@ sub row() | |||
77 | $tbl="ebp"; | 77 | $tbl="ebp"; |
78 | @mm=("mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7"); | 78 | @mm=("mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7"); |
79 | 79 | ||
80 | &static_label("table"); | ||
81 | |||
80 | &function_begin_B("whirlpool_block_mmx"); | 82 | &function_begin_B("whirlpool_block_mmx"); |
81 | &push ("ebp"); | 83 | &push ("ebp"); |
82 | &push ("ebx"); | 84 | &push ("ebx"); |
@@ -97,10 +99,8 @@ $tbl="ebp"; | |||
97 | &mov (&DWP(8,"ebx"),"ebp"); | 99 | &mov (&DWP(8,"ebx"),"ebp"); |
98 | &mov (&DWP(16,"ebx"),"eax"); # saved stack pointer | 100 | &mov (&DWP(16,"ebx"),"eax"); # saved stack pointer |
99 | 101 | ||
100 | &call (&label("pic_point")); | 102 | &picsetup($tbl); |
101 | &set_label("pic_point"); | 103 | &picsymbol($tbl, &label("table"), $tbl); |
102 | &blindpop($tbl); | ||
103 | &lea ($tbl,&DWP(&label("table")."-".&label("pic_point"),$tbl)); | ||
104 | 104 | ||
105 | &xor ("ecx","ecx"); | 105 | &xor ("ecx","ecx"); |
106 | &xor ("edx","edx"); | 106 | &xor ("edx","edx"); |
@@ -218,7 +218,9 @@ for($i=0;$i<8;$i++) { | |||
218 | &pop ("ebx"); | 218 | &pop ("ebx"); |
219 | &pop ("ebp"); | 219 | &pop ("ebp"); |
220 | &ret (); | 220 | &ret (); |
221 | &function_end_B("whirlpool_block_mmx"); | ||
221 | 222 | ||
223 | &rodataseg(); | ||
222 | &align(64); | 224 | &align(64); |
223 | &set_label("table"); | 225 | &set_label("table"); |
224 | &LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8); | 226 | &LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8); |
@@ -488,6 +490,6 @@ for($i=0;$i<8;$i++) { | |||
488 | &L(0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8); | 490 | &L(0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8); |
489 | &L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e); | 491 | &L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e); |
490 | &L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33); | 492 | &L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33); |
493 | &previous(); | ||
491 | 494 | ||
492 | &function_end_B("whirlpool_block_mmx"); | ||
493 | &asm_finish(); | 495 | &asm_finish(); |