diff options
| author | miod <> | 2023-02-01 20:45:04 +0000 |
|---|---|---|
| committer | miod <> | 2023-02-01 20:45:04 +0000 |
| commit | dc4b2cf3e0979364f14ff89a07ffafc47435e2e7 (patch) | |
| tree | 58219f5c6538172b9d00f33bf48c9a61419ec7a0 | |
| parent | 86f42338b2994b620482c37e3d0d9fc3ba1f523b (diff) | |
| download | openbsd-dc4b2cf3e0979364f14ff89a07ffafc47435e2e7.tar.gz openbsd-dc4b2cf3e0979364f14ff89a07ffafc47435e2e7.tar.bz2 openbsd-dc4b2cf3e0979364f14ff89a07ffafc47435e2e7.zip | |
Move all data blocks from .text to .rodata and cleanup up and homogeneize code
responsible from getting the proper address of those blocks.
ok tb@ jsing@
| -rw-r--r-- | src/lib/libcrypto/aes/asm/aes-586.pl | 45 | ||||
| -rw-r--r-- | src/lib/libcrypto/aes/asm/aesni-x86.pl | 1 | ||||
| -rw-r--r-- | src/lib/libcrypto/aes/asm/vpaes-x86.pl | 34 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/bn-586.pl | 9 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/asm/x86-gf2m.pl | 5 | ||||
| -rwxr-xr-x | src/lib/libcrypto/bn/asm/x86-mont.pl | 5 | ||||
| -rw-r--r-- | src/lib/libcrypto/camellia/asm/cmll-x86.pl | 40 | ||||
| -rw-r--r-- | src/lib/libcrypto/des/asm/des-586.pl | 9 | ||||
| -rw-r--r-- | src/lib/libcrypto/modes/asm/ghash-x86.pl | 66 | ||||
| -rw-r--r-- | src/lib/libcrypto/perlasm/cbc.pl | 82 | ||||
| -rw-r--r-- | src/lib/libcrypto/perlasm/x86gas.pl | 92 | ||||
| -rw-r--r-- | src/lib/libcrypto/rc4/asm/rc4-586.pl | 26 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/asm/sha1-586.pl | 26 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/asm/sha256-586.pl | 14 | ||||
| -rw-r--r-- | src/lib/libcrypto/sha/asm/sha512-586.pl | 18 | ||||
| -rw-r--r-- | src/lib/libcrypto/whrlpool/asm/wp-mmx.pl | 12 |
16 files changed, 248 insertions, 236 deletions
diff --git a/src/lib/libcrypto/aes/asm/aes-586.pl b/src/lib/libcrypto/aes/asm/aes-586.pl index c5ae3f6903..4e0f34cba3 100644 --- a/src/lib/libcrypto/aes/asm/aes-586.pl +++ b/src/lib/libcrypto/aes/asm/aes-586.pl | |||
| @@ -950,8 +950,10 @@ sub enclast() | |||
| 950 | &xor ($s3,&DWP(12,$key)); | 950 | &xor ($s3,&DWP(12,$key)); |
| 951 | 951 | ||
| 952 | &ret (); | 952 | &ret (); |
| 953 | &function_end_B("_x86_AES_encrypt"); | ||
| 953 | 954 | ||
| 954 | &set_label("AES_Te",64); # Yes! I keep it in the code segment! | 955 | &rodataseg(); |
| 956 | &set_label("AES_Te",64); | ||
| 955 | &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); | 957 | &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6); |
| 956 | &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); | 958 | &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591); |
| 957 | &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56); | 959 | &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56); |
| @@ -1154,7 +1156,7 @@ sub enclast() | |||
| 1154 | &data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080); | 1156 | &data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080); |
| 1155 | &data_word(0x0000001b, 0x00000036, 0x00000000, 0x00000000); | 1157 | &data_word(0x0000001b, 0x00000036, 0x00000000, 0x00000000); |
| 1156 | &data_word(0x00000000, 0x00000000, 0x00000000, 0x00000000); | 1158 | &data_word(0x00000000, 0x00000000, 0x00000000, 0x00000000); |
| 1157 | &function_end_B("_x86_AES_encrypt"); | 1159 | &previous(); |
| 1158 | 1160 | ||
| 1159 | # void AES_encrypt (const void *inp,void *out,const AES_KEY *key); | 1161 | # void AES_encrypt (const void *inp,void *out,const AES_KEY *key); |
| 1160 | &function_begin("AES_encrypt"); | 1162 | &function_begin("AES_encrypt"); |
| @@ -1174,11 +1176,9 @@ sub enclast() | |||
| 1174 | &add ("esp",4); # 4 is reserved for caller's return address | 1176 | &add ("esp",4); # 4 is reserved for caller's return address |
| 1175 | &mov ($_esp,$s0); # save stack pointer | 1177 | &mov ($_esp,$s0); # save stack pointer |
| 1176 | 1178 | ||
| 1177 | &call (&label("pic_point")); # make it PIC! | 1179 | &picsetup($tbl); |
| 1178 | &set_label("pic_point"); | 1180 | &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl); |
| 1179 | &blindpop($tbl); | 1181 | &picsymbol($tbl, &label("AES_Te"), $tbl); |
| 1180 | &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if (!$x86only); | ||
| 1181 | &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl)); | ||
| 1182 | 1182 | ||
| 1183 | # pick Te4 copy which can't "overlap" with stack frame or key schedule | 1183 | # pick Te4 copy which can't "overlap" with stack frame or key schedule |
| 1184 | &lea ($s1,&DWP(768-4,"esp")); | 1184 | &lea ($s1,&DWP(768-4,"esp")); |
| @@ -1744,8 +1744,10 @@ sub declast() | |||
| 1744 | &xor ($s3,&DWP(12,$key)); | 1744 | &xor ($s3,&DWP(12,$key)); |
| 1745 | 1745 | ||
| 1746 | &ret (); | 1746 | &ret (); |
| 1747 | &function_end_B("_x86_AES_decrypt"); | ||
| 1747 | 1748 | ||
| 1748 | &set_label("AES_Td",64); # Yes! I keep it in the code segment! | 1749 | &rodataseg(); |
| 1750 | &set_label("AES_Td",64); | ||
| 1749 | &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a); | 1751 | &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a); |
| 1750 | &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b); | 1752 | &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b); |
| 1751 | &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5); | 1753 | &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5); |
| @@ -1943,7 +1945,7 @@ sub declast() | |||
| 1943 | &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); | 1945 | &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61); |
| 1944 | &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); | 1946 | &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26); |
| 1945 | &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); | 1947 | &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d); |
| 1946 | &function_end_B("_x86_AES_decrypt"); | 1948 | &previous(); |
| 1947 | 1949 | ||
| 1948 | # void AES_decrypt (const void *inp,void *out,const AES_KEY *key); | 1950 | # void AES_decrypt (const void *inp,void *out,const AES_KEY *key); |
| 1949 | &function_begin("AES_decrypt"); | 1951 | &function_begin("AES_decrypt"); |
| @@ -1963,11 +1965,9 @@ sub declast() | |||
| 1963 | &add ("esp",4); # 4 is reserved for caller's return address | 1965 | &add ("esp",4); # 4 is reserved for caller's return address |
| 1964 | &mov ($_esp,$s0); # save stack pointer | 1966 | &mov ($_esp,$s0); # save stack pointer |
| 1965 | 1967 | ||
| 1966 | &call (&label("pic_point")); # make it PIC! | 1968 | &picsetup($tbl); |
| 1967 | &set_label("pic_point"); | 1969 | &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl); |
| 1968 | &blindpop($tbl); | 1970 | &picsymbol($tbl, &label("AES_Td"), $tbl); |
| 1969 | &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only); | ||
| 1970 | &lea ($tbl,&DWP(&label("AES_Td")."-".&label("pic_point"),$tbl)); | ||
| 1971 | 1971 | ||
| 1972 | # pick Td4 copy which can't "overlap" with stack frame or key schedule | 1972 | # pick Td4 copy which can't "overlap" with stack frame or key schedule |
| 1973 | &lea ($s1,&DWP(768-4,"esp")); | 1973 | &lea ($s1,&DWP(768-4,"esp")); |
| @@ -2034,13 +2034,10 @@ my $mark=&DWP(76+240,"esp"); # copy of aes_key->rounds | |||
| 2034 | &cmp ($s2,0); | 2034 | &cmp ($s2,0); |
| 2035 | &je (&label("drop_out")); | 2035 | &je (&label("drop_out")); |
| 2036 | 2036 | ||
| 2037 | &call (&label("pic_point")); # make it PIC! | 2037 | &picsetup($tbl); |
| 2038 | &set_label("pic_point"); | 2038 | &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl); |
| 2039 | &blindpop($tbl); | 2039 | &picsymbol($tbl, &label("AES_Te"), $tbl); |
| 2040 | &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only); | ||
| 2041 | |||
| 2042 | &cmp (&wparam(5),0); | 2040 | &cmp (&wparam(5),0); |
| 2043 | &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl)); | ||
| 2044 | &jne (&label("picked_te")); | 2041 | &jne (&label("picked_te")); |
| 2045 | &lea ($tbl,&DWP(&label("AES_Td")."-".&label("AES_Te"),$tbl)); | 2042 | &lea ($tbl,&DWP(&label("AES_Td")."-".&label("AES_Te"),$tbl)); |
| 2046 | &set_label("picked_te"); | 2043 | &set_label("picked_te"); |
| @@ -2659,10 +2656,9 @@ sub enckey() | |||
| 2659 | &test ("edi",-1); | 2656 | &test ("edi",-1); |
| 2660 | &jz (&label("badpointer")); | 2657 | &jz (&label("badpointer")); |
| 2661 | 2658 | ||
| 2662 | &call (&label("pic_point")); | 2659 | &picsetup($tbl); |
| 2663 | &set_label("pic_point"); | 2660 | &picsymbol($tbl, &label("AES_Te"), $tbl); |
| 2664 | &blindpop($tbl); | 2661 | |
| 2665 | &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl)); | ||
| 2666 | &lea ($tbl,&DWP(2048+128,$tbl)); | 2662 | &lea ($tbl,&DWP(2048+128,$tbl)); |
| 2667 | 2663 | ||
| 2668 | # prefetch Te4 | 2664 | # prefetch Te4 |
| @@ -2975,6 +2971,5 @@ sub deckey() | |||
| 2975 | 2971 | ||
| 2976 | &xor ("eax","eax"); # return success | 2972 | &xor ("eax","eax"); # return success |
| 2977 | &function_end("AES_set_decrypt_key"); | 2973 | &function_end("AES_set_decrypt_key"); |
| 2978 | &asciz("AES for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
| 2979 | 2974 | ||
| 2980 | &asm_finish(); | 2975 | &asm_finish(); |
diff --git a/src/lib/libcrypto/aes/asm/aesni-x86.pl b/src/lib/libcrypto/aes/asm/aesni-x86.pl index 8c1d0b5bed..ff44415611 100644 --- a/src/lib/libcrypto/aes/asm/aesni-x86.pl +++ b/src/lib/libcrypto/aes/asm/aesni-x86.pl | |||
| @@ -2184,6 +2184,5 @@ if ($PREFIX eq "aesni") { | |||
| 2184 | &set_label("dec_key_ret"); | 2184 | &set_label("dec_key_ret"); |
| 2185 | &ret (); | 2185 | &ret (); |
| 2186 | &function_end_B("${PREFIX}_set_decrypt_key"); | 2186 | &function_end_B("${PREFIX}_set_decrypt_key"); |
| 2187 | &asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>"); | ||
| 2188 | 2187 | ||
| 2189 | &asm_finish(); | 2188 | &asm_finish(); |
diff --git a/src/lib/libcrypto/aes/asm/vpaes-x86.pl b/src/lib/libcrypto/aes/asm/vpaes-x86.pl index 1533e2c304..38cef61733 100644 --- a/src/lib/libcrypto/aes/asm/vpaes-x86.pl +++ b/src/lib/libcrypto/aes/asm/vpaes-x86.pl | |||
| @@ -57,6 +57,7 @@ $PREFIX="vpaes"; | |||
| 57 | my ($round, $base, $magic, $key, $const, $inp, $out)= | 57 | my ($round, $base, $magic, $key, $const, $inp, $out)= |
| 58 | ("eax", "ebx", "ecx", "edx","ebp", "esi","edi"); | 58 | ("eax", "ebx", "ecx", "edx","ebp", "esi","edi"); |
| 59 | 59 | ||
| 60 | &rodataseg(); | ||
| 60 | &static_label("_vpaes_consts"); | 61 | &static_label("_vpaes_consts"); |
| 61 | &static_label("_vpaes_schedule_low_round"); | 62 | &static_label("_vpaes_schedule_low_round"); |
| 62 | 63 | ||
| @@ -153,8 +154,7 @@ $k_dsbe=0x2a0; # decryption sbox output *E*u, *E*t | |||
| 153 | $k_dsbo=0x2c0; # decryption sbox final output | 154 | $k_dsbo=0x2c0; # decryption sbox final output |
| 154 | &data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9); | 155 | &data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9); |
| 155 | &data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159); | 156 | &data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159); |
| 156 | &asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)"); | 157 | &previous(); |
| 157 | &align (64); | ||
| 158 | 158 | ||
| 159 | &function_begin_B("_vpaes_preheat"); | 159 | &function_begin_B("_vpaes_preheat"); |
| 160 | &add ($const,&DWP(0,"esp")); | 160 | &add ($const,&DWP(0,"esp")); |
| @@ -762,9 +762,11 @@ $k_dsbo=0x2c0; # decryption sbox final output | |||
| 762 | &mov ($magic,0x30); | 762 | &mov ($magic,0x30); |
| 763 | &mov ($out,0); | 763 | &mov ($out,0); |
| 764 | 764 | ||
| 765 | &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); | 765 | &picsetup($const); |
| 766 | &picsymbol($const, &label("_vpaes_consts"), $const); | ||
| 767 | &lea ($const,&DWP(0x30,$const)) | ||
| 768 | |||
| 766 | &call ("_vpaes_schedule_core"); | 769 | &call ("_vpaes_schedule_core"); |
| 767 | &set_label("pic_point"); | ||
| 768 | 770 | ||
| 769 | &mov ("esp",&DWP(48,"esp")); | 771 | &mov ("esp",&DWP(48,"esp")); |
| 770 | &xor ("eax","eax"); | 772 | &xor ("eax","eax"); |
| @@ -792,18 +794,22 @@ $k_dsbo=0x2c0; # decryption sbox final output | |||
| 792 | &and ($magic,32); | 794 | &and ($magic,32); |
| 793 | &xor ($magic,32); # nbist==192?0:32; | 795 | &xor ($magic,32); # nbist==192?0:32; |
| 794 | 796 | ||
| 795 | &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); | 797 | &picsetup($const); |
| 798 | &picsymbol($const, &label("_vpaes_consts"), $const); | ||
| 799 | &lea ($const,&DWP(0x30,$const)) | ||
| 800 | |||
| 796 | &call ("_vpaes_schedule_core"); | 801 | &call ("_vpaes_schedule_core"); |
| 797 | &set_label("pic_point"); | ||
| 798 | 802 | ||
| 799 | &mov ("esp",&DWP(48,"esp")); | 803 | &mov ("esp",&DWP(48,"esp")); |
| 800 | &xor ("eax","eax"); | 804 | &xor ("eax","eax"); |
| 801 | &function_end("${PREFIX}_set_decrypt_key"); | 805 | &function_end("${PREFIX}_set_decrypt_key"); |
| 802 | 806 | ||
| 803 | &function_begin("${PREFIX}_encrypt"); | 807 | &function_begin("${PREFIX}_encrypt"); |
| 804 | &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); | 808 | &picsetup($const); |
| 809 | &picsymbol($const, &label("_vpaes_consts"), $const); | ||
| 810 | &lea ($const,&DWP(0x30,$const)) | ||
| 811 | |||
| 805 | &call ("_vpaes_preheat"); | 812 | &call ("_vpaes_preheat"); |
| 806 | &set_label("pic_point"); | ||
| 807 | &mov ($inp,&wparam(0)); # inp | 813 | &mov ($inp,&wparam(0)); # inp |
| 808 | &lea ($base,&DWP(-56,"esp")); | 814 | &lea ($base,&DWP(-56,"esp")); |
| 809 | &mov ($out,&wparam(1)); # out | 815 | &mov ($out,&wparam(1)); # out |
| @@ -820,9 +826,11 @@ $k_dsbo=0x2c0; # decryption sbox final output | |||
| 820 | &function_end("${PREFIX}_encrypt"); | 826 | &function_end("${PREFIX}_encrypt"); |
| 821 | 827 | ||
| 822 | &function_begin("${PREFIX}_decrypt"); | 828 | &function_begin("${PREFIX}_decrypt"); |
| 823 | &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); | 829 | &picsetup($const); |
| 830 | &picsymbol($const, &label("_vpaes_consts"), $const); | ||
| 831 | &lea ($const,&DWP(0x30,$const)) | ||
| 832 | |||
| 824 | &call ("_vpaes_preheat"); | 833 | &call ("_vpaes_preheat"); |
| 825 | &set_label("pic_point"); | ||
| 826 | &mov ($inp,&wparam(0)); # inp | 834 | &mov ($inp,&wparam(0)); # inp |
| 827 | &lea ($base,&DWP(-56,"esp")); | 835 | &lea ($base,&DWP(-56,"esp")); |
| 828 | &mov ($out,&wparam(1)); # out | 836 | &mov ($out,&wparam(1)); # out |
| @@ -859,9 +867,11 @@ $k_dsbo=0x2c0; # decryption sbox final output | |||
| 859 | &mov (&DWP(8,"esp"),$const); # save ivp | 867 | &mov (&DWP(8,"esp"),$const); # save ivp |
| 860 | &mov ($out,$round); # $out works as $len | 868 | &mov ($out,$round); # $out works as $len |
| 861 | 869 | ||
| 862 | &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point"))); | 870 | &picsetup($const); |
| 871 | &picsymbol($const, &label("_vpaes_consts"), $const); | ||
| 872 | &lea ($const,&DWP(0x30,$const)) | ||
| 873 | |||
| 863 | &call ("_vpaes_preheat"); | 874 | &call ("_vpaes_preheat"); |
| 864 | &set_label("pic_point"); | ||
| 865 | &cmp ($magic,0); | 875 | &cmp ($magic,0); |
| 866 | &je (&label("cbc_dec_loop")); | 876 | &je (&label("cbc_dec_loop")); |
| 867 | &jmp (&label("cbc_enc_loop")); | 877 | &jmp (&label("cbc_enc_loop")); |
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl index c4e2baa6c5..b502fe60ee 100644 --- a/src/lib/libcrypto/bn/asm/bn-586.pl +++ b/src/lib/libcrypto/bn/asm/bn-586.pl | |||
| @@ -32,7 +32,8 @@ sub bn_mul_add_words | |||
| 32 | $c="ecx"; | 32 | $c="ecx"; |
| 33 | 33 | ||
| 34 | if ($sse2) { | 34 | if ($sse2) { |
| 35 | &picmeup("eax","OPENSSL_ia32cap_P"); | 35 | &picsetup("eax"); |
| 36 | &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); | ||
| 36 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); | 37 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
| 37 | &jnc(&label("maw_non_sse2")); | 38 | &jnc(&label("maw_non_sse2")); |
| 38 | 39 | ||
| @@ -218,7 +219,8 @@ sub bn_mul_words | |||
| 218 | $c="ecx"; | 219 | $c="ecx"; |
| 219 | 220 | ||
| 220 | if ($sse2) { | 221 | if ($sse2) { |
| 221 | &picmeup("eax","OPENSSL_ia32cap_P"); | 222 | &picsetup("eax"); |
| 223 | &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); | ||
| 222 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); | 224 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
| 223 | &jnc(&label("mw_non_sse2")); | 225 | &jnc(&label("mw_non_sse2")); |
| 224 | 226 | ||
| @@ -329,7 +331,8 @@ sub bn_sqr_words | |||
| 329 | $c="ecx"; | 331 | $c="ecx"; |
| 330 | 332 | ||
| 331 | if ($sse2) { | 333 | if ($sse2) { |
| 332 | &picmeup("eax","OPENSSL_ia32cap_P"); | 334 | &picsetup("eax"); |
| 335 | &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); | ||
| 333 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); | 336 | &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
| 334 | &jnc(&label("sqr_non_sse2")); | 337 | &jnc(&label("sqr_non_sse2")); |
| 335 | 338 | ||
diff --git a/src/lib/libcrypto/bn/asm/x86-gf2m.pl b/src/lib/libcrypto/bn/asm/x86-gf2m.pl index 9715b2158f..cb2f2a5c30 100644 --- a/src/lib/libcrypto/bn/asm/x86-gf2m.pl +++ b/src/lib/libcrypto/bn/asm/x86-gf2m.pl | |||
| @@ -200,7 +200,8 @@ $R="mm0"; | |||
| 200 | # void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0); | 200 | # void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0); |
| 201 | &function_begin_B("bn_GF2m_mul_2x2"); | 201 | &function_begin_B("bn_GF2m_mul_2x2"); |
| 202 | if (!$x86only) { | 202 | if (!$x86only) { |
| 203 | &picmeup("edx","OPENSSL_ia32cap_P"); | 203 | &picsetup("edx"); |
| 204 | &picsymbol("edx", "OPENSSL_ia32cap_P", "edx"); | ||
| 204 | &mov ("eax",&DWP(0,"edx")); | 205 | &mov ("eax",&DWP(0,"edx")); |
| 205 | &mov ("edx",&DWP(4,"edx")); | 206 | &mov ("edx",&DWP(4,"edx")); |
| 206 | &test ("eax","\$IA32CAP_MASK0_MMX"); # check MMX bit | 207 | &test ("eax","\$IA32CAP_MASK0_MMX"); # check MMX bit |
| @@ -308,6 +309,4 @@ if ($sse2) { | |||
| 308 | &ret (); | 309 | &ret (); |
| 309 | &function_end_B("bn_GF2m_mul_2x2"); | 310 | &function_end_B("bn_GF2m_mul_2x2"); |
| 310 | 311 | ||
| 311 | &asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
| 312 | |||
| 313 | &asm_finish(); | 312 | &asm_finish(); |
diff --git a/src/lib/libcrypto/bn/asm/x86-mont.pl b/src/lib/libcrypto/bn/asm/x86-mont.pl index e6c04739b1..6524651748 100755 --- a/src/lib/libcrypto/bn/asm/x86-mont.pl +++ b/src/lib/libcrypto/bn/asm/x86-mont.pl | |||
| @@ -113,7 +113,8 @@ $mul1="mm5"; | |||
| 113 | $temp="mm6"; | 113 | $temp="mm6"; |
| 114 | $mask="mm7"; | 114 | $mask="mm7"; |
| 115 | 115 | ||
| 116 | &picmeup("eax","OPENSSL_ia32cap_P"); | 116 | &picsetup("eax"); |
| 117 | &picsymbol("eax", "OPENSSL_ia32cap_P", "eax"); | ||
| 117 | &bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); | 118 | &bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2"); |
| 118 | &jnc (&label("non_sse2")); | 119 | &jnc (&label("non_sse2")); |
| 119 | 120 | ||
| @@ -588,6 +589,4 @@ $sbit=$num; | |||
| 588 | &set_label("just_leave"); | 589 | &set_label("just_leave"); |
| 589 | &function_end("bn_mul_mont"); | 590 | &function_end("bn_mul_mont"); |
| 590 | 591 | ||
| 591 | &asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
| 592 | |||
| 593 | &asm_finish(); | 592 | &asm_finish(); |
diff --git a/src/lib/libcrypto/camellia/asm/cmll-x86.pl b/src/lib/libcrypto/camellia/asm/cmll-x86.pl index 027302ac86..a4ab11e54d 100644 --- a/src/lib/libcrypto/camellia/asm/cmll-x86.pl +++ b/src/lib/libcrypto/camellia/asm/cmll-x86.pl | |||
| @@ -141,10 +141,8 @@ my $t0=@T[($j)%4],$t1=@T[($j+1)%4],$t2=@T[($j+2)%4],$t3=@T[($j+3)%4]; | |||
| 141 | &mov ($_esp,"ebx"); # save %esp | 141 | &mov ($_esp,"ebx"); # save %esp |
| 142 | &mov ($_end,"eax"); # save keyEnd | 142 | &mov ($_end,"eax"); # save keyEnd |
| 143 | 143 | ||
| 144 | &call (&label("pic_point")); | 144 | &picsetup($Tbl); |
| 145 | &set_label("pic_point"); | 145 | &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); |
| 146 | &blindpop($Tbl); | ||
| 147 | &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); | ||
| 148 | 146 | ||
| 149 | &mov (@T[0],&DWP(0,$idx)); # load plaintext | 147 | &mov (@T[0],&DWP(0,$idx)); # load plaintext |
| 150 | &mov (@T[1],&DWP(4,$idx)); | 148 | &mov (@T[1],&DWP(4,$idx)); |
| @@ -206,10 +204,8 @@ if ($OPENSSL) { | |||
| 206 | &mov ($_esp,"ebx"); # save %esp | 204 | &mov ($_esp,"ebx"); # save %esp |
| 207 | &mov ($_end,"eax"); # save keyEnd | 205 | &mov ($_end,"eax"); # save keyEnd |
| 208 | 206 | ||
| 209 | &call (&label("pic_point")); | 207 | &picsetup($Tbl); |
| 210 | &set_label("pic_point"); | 208 | &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); |
| 211 | &blindpop($Tbl); | ||
| 212 | &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); | ||
| 213 | 209 | ||
| 214 | &mov (@T[0],&DWP(0,$idx)); # load plaintext | 210 | &mov (@T[0],&DWP(0,$idx)); # load plaintext |
| 215 | &mov (@T[1],&DWP(4,$idx)); | 211 | &mov (@T[1],&DWP(4,$idx)); |
| @@ -316,10 +312,8 @@ if ($OPENSSL) { | |||
| 316 | &lea ($key,&DWP(0,$key,"eax")); | 312 | &lea ($key,&DWP(0,$key,"eax")); |
| 317 | &mov (&DWP(5*4,"esp"),"ebx");# save %esp | 313 | &mov (&DWP(5*4,"esp"),"ebx");# save %esp |
| 318 | 314 | ||
| 319 | &call (&label("pic_point")); | 315 | &picsetup($Tbl); |
| 320 | &set_label("pic_point"); | 316 | &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); |
| 321 | &blindpop($Tbl); | ||
| 322 | &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); | ||
| 323 | 317 | ||
| 324 | &mov (@T[0],&DWP(0,$idx)); # load ciphertext | 318 | &mov (@T[0],&DWP(0,$idx)); # load ciphertext |
| 325 | &mov (@T[1],&DWP(4,$idx)); | 319 | &mov (@T[1],&DWP(4,$idx)); |
| @@ -381,10 +375,8 @@ if ($OPENSSL) { | |||
| 381 | &lea ($key,&DWP(0,$key,"eax")); | 375 | &lea ($key,&DWP(0,$key,"eax")); |
| 382 | &mov (&DWP(5*4,"esp"),"ebx");# save %esp | 376 | &mov (&DWP(5*4,"esp"),"ebx");# save %esp |
| 383 | 377 | ||
| 384 | &call (&label("pic_point")); | 378 | &picsetup($Tbl); |
| 385 | &set_label("pic_point"); | 379 | &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); |
| 386 | &blindpop($Tbl); | ||
| 387 | &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); | ||
| 388 | 380 | ||
| 389 | &mov (@T[0],&DWP(0,$idx)); # load ciphertext | 381 | &mov (@T[0],&DWP(0,$idx)); # load ciphertext |
| 390 | &mov (@T[1],&DWP(4,$idx)); | 382 | &mov (@T[1],&DWP(4,$idx)); |
| @@ -594,10 +586,8 @@ my $bias=int(@T[0])?shift(@T):0; | |||
| 594 | &xor (@T[3],&DWP(1*8+4,$key)); | 586 | &xor (@T[3],&DWP(1*8+4,$key)); |
| 595 | 587 | ||
| 596 | &set_label("1st128",4); | 588 | &set_label("1st128",4); |
| 597 | &call (&label("pic_point")); | 589 | &picsetup($Tbl); |
| 598 | &set_label("pic_point"); | 590 | &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); |
| 599 | &blindpop($Tbl); | ||
| 600 | &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); | ||
| 601 | &lea ($key,&DWP(&label("Camellia_SIGMA")."-".&label("Camellia_SBOX"),$Tbl)); | 591 | &lea ($key,&DWP(&label("Camellia_SIGMA")."-".&label("Camellia_SBOX"),$Tbl)); |
| 602 | 592 | ||
| 603 | &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[0] | 593 | &mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[0] |
| @@ -786,6 +776,7 @@ sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; return $i<<24|$i<< | |||
| 786 | sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; return $i<<16|$i<<8|$i; } | 776 | sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; return $i<<16|$i<<8|$i; } |
| 787 | sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<8|$i; } | 777 | sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<8|$i; } |
| 788 | 778 | ||
| 779 | &rodataseg(); | ||
| 789 | &set_label("Camellia_SIGMA",64); | 780 | &set_label("Camellia_SIGMA",64); |
| 790 | &data_word( | 781 | &data_word( |
| 791 | 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2, | 782 | 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2, |
| @@ -796,6 +787,7 @@ sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<< | |||
| 796 | # tables are interleaved, remember? | 787 | # tables are interleaved, remember? |
| 797 | for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); } | 788 | for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); } |
| 798 | for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); } | 789 | for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); } |
| 790 | &previous(); | ||
| 799 | 791 | ||
| 800 | # void Camellia_cbc_encrypt (const void char *inp, unsigned char *out, | 792 | # void Camellia_cbc_encrypt (const void char *inp, unsigned char *out, |
| 801 | # size_t length, const CAMELLIA_KEY *key, | 793 | # size_t length, const CAMELLIA_KEY *key, |
| @@ -856,10 +848,8 @@ my ($s0,$s1,$s2,$s3) = @T; | |||
| 856 | &mov ($_key,$s3); # save copy of key | 848 | &mov ($_key,$s3); # save copy of key |
| 857 | &mov ($_ivp,$Tbl); # save copy of ivp | 849 | &mov ($_ivp,$Tbl); # save copy of ivp |
| 858 | 850 | ||
| 859 | &call (&label("pic_point")); # make it PIC! | 851 | &picsetup($Tbl); |
| 860 | &set_label("pic_point"); | 852 | &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl); |
| 861 | &blindpop($Tbl); | ||
| 862 | &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl)); | ||
| 863 | 853 | ||
| 864 | &mov ($idx,32); | 854 | &mov ($idx,32); |
| 865 | &set_label("prefetch_sbox",4); | 855 | &set_label("prefetch_sbox",4); |
| @@ -1133,6 +1123,4 @@ my ($s0,$s1,$s2,$s3) = @T; | |||
| 1133 | &function_end("Camellia_cbc_encrypt"); | 1123 | &function_end("Camellia_cbc_encrypt"); |
| 1134 | } | 1124 | } |
| 1135 | 1125 | ||
| 1136 | &asciz("Camellia for x86 by <appro\@openssl.org>"); | ||
| 1137 | |||
| 1138 | &asm_finish(); | 1126 | &asm_finish(); |
diff --git a/src/lib/libcrypto/des/asm/des-586.pl b/src/lib/libcrypto/des/asm/des-586.pl index 5b5f39cebd..e11b2ef80f 100644 --- a/src/lib/libcrypto/des/asm/des-586.pl +++ b/src/lib/libcrypto/des/asm/des-586.pl | |||
| @@ -154,11 +154,8 @@ sub DES_encrypt | |||
| 154 | &rotl($L,3); | 154 | &rotl($L,3); |
| 155 | } | 155 | } |
| 156 | 156 | ||
| 157 | # PIC-ification:-) | 157 | &picsetup($trans); |
| 158 | &call (&label("pic_point")); | 158 | &picsymbol($trans, &label("DES_SPtrans"), $trans); |
| 159 | &set_label("pic_point"); | ||
| 160 | &blindpop($trans); | ||
| 161 | &lea ($trans,&DWP(&label("DES_SPtrans")."-".&label("pic_point"),$trans)); | ||
| 162 | 159 | ||
| 163 | &mov( "ecx", &wparam(1) ); | 160 | &mov( "ecx", &wparam(1) ); |
| 164 | 161 | ||
| @@ -314,6 +311,7 @@ sub FP_new | |||
| 314 | 311 | ||
| 315 | sub DES_SPtrans | 312 | sub DES_SPtrans |
| 316 | { | 313 | { |
| 314 | &rodataseg(); | ||
| 317 | &set_label("DES_SPtrans",64); | 315 | &set_label("DES_SPtrans",64); |
| 318 | &data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802); | 316 | &data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802); |
| 319 | &data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002); | 317 | &data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002); |
| @@ -450,4 +448,5 @@ sub DES_SPtrans | |||
| 450 | &data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000); | 448 | &data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000); |
| 451 | &data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000); | 449 | &data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000); |
| 452 | &data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080); | 450 | &data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080); |
| 451 | &previous(); | ||
| 453 | } | 452 | } |
diff --git a/src/lib/libcrypto/modes/asm/ghash-x86.pl b/src/lib/libcrypto/modes/asm/ghash-x86.pl index 27492597ad..5e868a43ff 100644 --- a/src/lib/libcrypto/modes/asm/ghash-x86.pl +++ b/src/lib/libcrypto/modes/asm/ghash-x86.pl | |||
| @@ -411,10 +411,8 @@ $S=12; # shift factor for rem_4bit | |||
| 411 | &mov ($inp,&wparam(0)); # load Xi | 411 | &mov ($inp,&wparam(0)); # load Xi |
| 412 | &mov ($Htbl,&wparam(1)); # load Htable | 412 | &mov ($Htbl,&wparam(1)); # load Htable |
| 413 | 413 | ||
| 414 | &call (&label("pic_point")); | 414 | &picsetup("eax"); |
| 415 | &set_label("pic_point"); | 415 | &picsymbol("eax", &label("rem_4bit"), "eax"); |
| 416 | &blindpop("eax"); | ||
| 417 | &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); | ||
| 418 | 416 | ||
| 419 | &movz ($Zll,&BP(15,$inp)); | 417 | &movz ($Zll,&BP(15,$inp)); |
| 420 | 418 | ||
| @@ -436,10 +434,8 @@ $S=12; # shift factor for rem_4bit | |||
| 436 | &mov ($inp,&wparam(2)); # load in | 434 | &mov ($inp,&wparam(2)); # load in |
| 437 | &mov ($Zlh,&wparam(3)); # load len | 435 | &mov ($Zlh,&wparam(3)); # load len |
| 438 | 436 | ||
| 439 | &call (&label("pic_point")); | 437 | &picsetup("eax"); |
| 440 | &set_label("pic_point"); | 438 | &picsymbol("eax", &label("rem_4bit"), "eax"); |
| 441 | &blindpop("eax"); | ||
| 442 | &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); | ||
| 443 | 439 | ||
| 444 | &add ($Zlh,$inp); | 440 | &add ($Zlh,$inp); |
| 445 | &mov (&wparam(3),$Zlh); # len to point at the end of input | 441 | &mov (&wparam(3),$Zlh); # len to point at the end of input |
| @@ -584,10 +580,8 @@ sub mmx_loop() { | |||
| 584 | &mov ($inp,&wparam(0)); # load Xi | 580 | &mov ($inp,&wparam(0)); # load Xi |
| 585 | &mov ($Htbl,&wparam(1)); # load Htable | 581 | &mov ($Htbl,&wparam(1)); # load Htable |
| 586 | 582 | ||
| 587 | &call (&label("pic_point")); | 583 | &picsetup("eax"); |
| 588 | &set_label("pic_point"); | 584 | &picsymbol("eax", &label("rem_4bit"), "eax"); |
| 589 | &blindpop("eax"); | ||
| 590 | &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax")); | ||
| 591 | 585 | ||
| 592 | &movz ($Zll,&BP(15,$inp)); | 586 | &movz ($Zll,&BP(15,$inp)); |
| 593 | 587 | ||
| @@ -618,10 +612,9 @@ sub mmx_loop() { | |||
| 618 | &mov ("ecx",&wparam(2)); # inp | 612 | &mov ("ecx",&wparam(2)); # inp |
| 619 | &mov ("edx",&wparam(3)); # len | 613 | &mov ("edx",&wparam(3)); # len |
| 620 | &mov ("ebp","esp"); # original %esp | 614 | &mov ("ebp","esp"); # original %esp |
| 621 | &call (&label("pic_point")); | 615 | |
| 622 | &set_label ("pic_point"); | 616 | &picsetup($rem_8bit); |
| 623 | &blindpop ($rem_8bit); | 617 | &picsymbol($rem_8bit, &label("rem_8bit"), $rem_8bit); |
| 624 | &lea ($rem_8bit,&DWP(&label("rem_8bit")."-".&label("pic_point"),$rem_8bit)); | ||
| 625 | 618 | ||
| 626 | &sub ("esp",512+16+16); # allocate stack frame... | 619 | &sub ("esp",512+16+16); # allocate stack frame... |
| 627 | &and ("esp",-64); # ...and align it | 620 | &and ("esp",-64); # ...and align it |
| @@ -910,10 +903,8 @@ my ($Xhi,$Xi) = @_; | |||
| 910 | &mov ($Htbl,&wparam(0)); | 903 | &mov ($Htbl,&wparam(0)); |
| 911 | &mov ($Xip,&wparam(1)); | 904 | &mov ($Xip,&wparam(1)); |
| 912 | 905 | ||
| 913 | &call (&label("pic")); | 906 | &picsetup($const); |
| 914 | &set_label("pic"); | 907 | &picsymbol($const, &label("bswap"), $const); |
| 915 | &blindpop ($const); | ||
| 916 | &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); | ||
| 917 | 908 | ||
| 918 | &movdqu ($Hkey,&QWP(0,$Xip)); | 909 | &movdqu ($Hkey,&QWP(0,$Xip)); |
| 919 | &pshufd ($Hkey,$Hkey,0b01001110);# dword swap | 910 | &pshufd ($Hkey,$Hkey,0b01001110);# dword swap |
| @@ -947,10 +938,8 @@ my ($Xhi,$Xi) = @_; | |||
| 947 | &mov ($Xip,&wparam(0)); | 938 | &mov ($Xip,&wparam(0)); |
| 948 | &mov ($Htbl,&wparam(1)); | 939 | &mov ($Htbl,&wparam(1)); |
| 949 | 940 | ||
| 950 | &call (&label("pic")); | 941 | &picsetup($const); |
| 951 | &set_label("pic"); | 942 | &picsymbol($const, &label("bswap"), $const); |
| 952 | &blindpop ($const); | ||
| 953 | &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); | ||
| 954 | 943 | ||
| 955 | &movdqu ($Xi,&QWP(0,$Xip)); | 944 | &movdqu ($Xi,&QWP(0,$Xip)); |
| 956 | &movdqa ($T3,&QWP(0,$const)); | 945 | &movdqa ($T3,&QWP(0,$const)); |
| @@ -972,10 +961,8 @@ my ($Xhi,$Xi) = @_; | |||
| 972 | &mov ($inp,&wparam(2)); | 961 | &mov ($inp,&wparam(2)); |
| 973 | &mov ($len,&wparam(3)); | 962 | &mov ($len,&wparam(3)); |
| 974 | 963 | ||
| 975 | &call (&label("pic")); | 964 | &picsetup($const); |
| 976 | &set_label("pic"); | 965 | &picsymbol($const, &label("bswap"), $const); |
| 977 | &blindpop ($const); | ||
| 978 | &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); | ||
| 979 | 966 | ||
| 980 | &movdqu ($Xi,&QWP(0,$Xip)); | 967 | &movdqu ($Xi,&QWP(0,$Xip)); |
| 981 | &movdqa ($T3,&QWP(0,$const)); | 968 | &movdqa ($T3,&QWP(0,$const)); |
| @@ -1138,10 +1125,8 @@ my ($Xhi,$Xi)=@_; | |||
| 1138 | &mov ($Htbl,&wparam(0)); | 1125 | &mov ($Htbl,&wparam(0)); |
| 1139 | &mov ($Xip,&wparam(1)); | 1126 | &mov ($Xip,&wparam(1)); |
| 1140 | 1127 | ||
| 1141 | &call (&label("pic")); | 1128 | &picsetup($const); |
| 1142 | &set_label("pic"); | 1129 | &picsymbol($const, &label("bswap"), $const); |
| 1143 | &blindpop ($const); | ||
| 1144 | &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); | ||
| 1145 | 1130 | ||
| 1146 | &movdqu ($Hkey,&QWP(0,$Xip)); | 1131 | &movdqu ($Hkey,&QWP(0,$Xip)); |
| 1147 | &pshufd ($Hkey,$Hkey,0b01001110);# dword swap | 1132 | &pshufd ($Hkey,$Hkey,0b01001110);# dword swap |
| @@ -1161,10 +1146,8 @@ my ($Xhi,$Xi)=@_; | |||
| 1161 | &mov ($Xip,&wparam(0)); | 1146 | &mov ($Xip,&wparam(0)); |
| 1162 | &mov ($Htbl,&wparam(1)); | 1147 | &mov ($Htbl,&wparam(1)); |
| 1163 | 1148 | ||
| 1164 | &call (&label("pic")); | 1149 | &picsetup($const); |
| 1165 | &set_label("pic"); | 1150 | &picsymbol($const, &label("bswap"), $const); |
| 1166 | &blindpop ($const); | ||
| 1167 | &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); | ||
| 1168 | 1151 | ||
| 1169 | &movdqu ($Xi,&QWP(0,$Xip)); | 1152 | &movdqu ($Xi,&QWP(0,$Xip)); |
| 1170 | &movdqa ($Xn,&QWP(0,$const)); | 1153 | &movdqa ($Xn,&QWP(0,$const)); |
| @@ -1186,10 +1169,8 @@ my ($Xhi,$Xi)=@_; | |||
| 1186 | &mov ($inp,&wparam(2)); | 1169 | &mov ($inp,&wparam(2)); |
| 1187 | &mov ($len,&wparam(3)); | 1170 | &mov ($len,&wparam(3)); |
| 1188 | 1171 | ||
| 1189 | &call (&label("pic")); | 1172 | &picsetup($const); |
| 1190 | &set_label("pic"); | 1173 | &picsymbol($const, &label("bswap"), $const); |
| 1191 | &blindpop ($const); | ||
| 1192 | &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const)); | ||
| 1193 | 1174 | ||
| 1194 | &movdqu ($Xi,&QWP(0,$Xip)); | 1175 | &movdqu ($Xi,&QWP(0,$Xip)); |
| 1195 | &movdqa ($T3,&QWP(0,$const)); | 1176 | &movdqa ($T3,&QWP(0,$const)); |
| @@ -1270,11 +1251,14 @@ my ($Xhi,$Xi)=@_; | |||
| 1270 | 1251 | ||
| 1271 | } | 1252 | } |
| 1272 | 1253 | ||
| 1254 | &rodataseg(); | ||
| 1273 | &set_label("bswap",64); | 1255 | &set_label("bswap",64); |
| 1274 | &data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); | 1256 | &data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0); |
| 1275 | &data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial | 1257 | &data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial |
| 1258 | &previous(); | ||
| 1276 | }} # $sse2 | 1259 | }} # $sse2 |
| 1277 | 1260 | ||
| 1261 | &rodataseg(); | ||
| 1278 | &set_label("rem_4bit",64); | 1262 | &set_label("rem_4bit",64); |
| 1279 | &data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S); | 1263 | &data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S); |
| 1280 | &data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S); | 1264 | &data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S); |
| @@ -1313,9 +1297,9 @@ my ($Xhi,$Xi)=@_; | |||
| 1313 | &data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E); | 1297 | &data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E); |
| 1314 | &data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE); | 1298 | &data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE); |
| 1315 | &data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE); | 1299 | &data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE); |
| 1300 | &previous(); | ||
| 1316 | }}} # !$x86only | 1301 | }}} # !$x86only |
| 1317 | 1302 | ||
| 1318 | &asciz("GHASH for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
| 1319 | &asm_finish(); | 1303 | &asm_finish(); |
| 1320 | 1304 | ||
| 1321 | # A question was risen about choice of vanilla MMX. Or rather why wasn't | 1305 | # A question was risen about choice of vanilla MMX. Or rather why wasn't |
diff --git a/src/lib/libcrypto/perlasm/cbc.pl b/src/lib/libcrypto/perlasm/cbc.pl index 24561e759a..392f23e145 100644 --- a/src/lib/libcrypto/perlasm/cbc.pl +++ b/src/lib/libcrypto/perlasm/cbc.pl | |||
| @@ -34,6 +34,15 @@ sub cbc | |||
| 34 | # p1,p2,p3 are the offsets for parameters to be passed to the | 34 | # p1,p2,p3 are the offsets for parameters to be passed to the |
| 35 | # underlying calls. | 35 | # underlying calls. |
| 36 | 36 | ||
| 37 | &static_label("cbc_enc_jmp_table_".$name); | ||
| 38 | &static_label("ej1_".$name); | ||
| 39 | &static_label("ej2_".$name); | ||
| 40 | &static_label("ej3_".$name); | ||
| 41 | &static_label("ej4_".$name); | ||
| 42 | &static_label("ej5_".$name); | ||
| 43 | &static_label("ej6_".$name); | ||
| 44 | &static_label("ej7_".$name); | ||
| 45 | |||
| 37 | &function_begin_B($name,""); | 46 | &function_begin_B($name,""); |
| 38 | &comment(""); | 47 | &comment(""); |
| 39 | 48 | ||
| @@ -146,33 +155,32 @@ sub cbc | |||
| 146 | &mov($count, &wparam(2)); # length | 155 | &mov($count, &wparam(2)); # length |
| 147 | &and($count, 7); | 156 | &and($count, 7); |
| 148 | &jz(&label("finish")); | 157 | &jz(&label("finish")); |
| 149 | &call(&label("PIC_point")); | 158 | |
| 150 | &set_label("PIC_point"); | 159 | &picsetup("edx"); |
| 151 | &blindpop("edx"); | 160 | &picsymbol("ecx", &label("cbc_enc_jmp_table_".$name), "edx") |
| 152 | &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx")); | ||
| 153 | &mov($count,&DWP(0,"ecx",$count,4)); | 161 | &mov($count,&DWP(0,"ecx",$count,4)); |
| 154 | &add($count,"edx"); | 162 | &picadjust($count, "edx"); |
| 163 | |||
| 155 | &xor("ecx","ecx"); | 164 | &xor("ecx","ecx"); |
| 156 | &xor("edx","edx"); | 165 | &xor("edx","edx"); |
| 157 | #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4)); | ||
| 158 | &jmp_ptr($count); | 166 | &jmp_ptr($count); |
| 159 | 167 | ||
| 160 | &set_label("ej7"); | 168 | &set_label("ej7_".$name); |
| 161 | &movb(&HB("edx"), &BP(6,$in,"",0)); | 169 | &movb(&HB("edx"), &BP(6,$in,"",0)); |
| 162 | &shl("edx",8); | 170 | &shl("edx",8); |
| 163 | &set_label("ej6"); | 171 | &set_label("ej6_".$name); |
| 164 | &movb(&HB("edx"), &BP(5,$in,"",0)); | 172 | &movb(&HB("edx"), &BP(5,$in,"",0)); |
| 165 | &set_label("ej5"); | 173 | &set_label("ej5_".$name); |
| 166 | &movb(&LB("edx"), &BP(4,$in,"",0)); | 174 | &movb(&LB("edx"), &BP(4,$in,"",0)); |
| 167 | &set_label("ej4"); | 175 | &set_label("ej4_".$name); |
| 168 | &mov("ecx", &DWP(0,$in,"",0)); | 176 | &mov("ecx", &DWP(0,$in,"",0)); |
| 169 | &jmp(&label("ejend")); | 177 | &jmp(&label("ejend")); |
| 170 | &set_label("ej3"); | 178 | &set_label("ej3_".$name); |
| 171 | &movb(&HB("ecx"), &BP(2,$in,"",0)); | 179 | &movb(&HB("ecx"), &BP(2,$in,"",0)); |
| 172 | &shl("ecx",8); | 180 | &shl("ecx",8); |
| 173 | &set_label("ej2"); | 181 | &set_label("ej2_".$name); |
| 174 | &movb(&HB("ecx"), &BP(1,$in,"",0)); | 182 | &movb(&HB("ecx"), &BP(1,$in,"",0)); |
| 175 | &set_label("ej1"); | 183 | &set_label("ej1_".$name); |
| 176 | &movb(&LB("ecx"), &BP(0,$in,"",0)); | 184 | &movb(&LB("ecx"), &BP(0,$in,"",0)); |
| 177 | &set_label("ejend"); | 185 | &set_label("ejend"); |
| 178 | 186 | ||
| @@ -279,30 +287,14 @@ sub cbc | |||
| 279 | &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, | 287 | &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, |
| 280 | &mov("ebx", &DWP(4,$in,"",0)); # next iv actually | 288 | &mov("ebx", &DWP(4,$in,"",0)); # next iv actually |
| 281 | 289 | ||
| 282 | &set_label("dj7"); | ||
| 283 | &rotr("edx", 16); | 290 | &rotr("edx", 16); |
| 284 | &movb(&BP(6,$out,"",0), &LB("edx")); | 291 | &movb(&BP(6,$out,"",0), &LB("edx")); |
| 285 | &shr("edx",16); | 292 | &shr("edx",16); |
| 286 | &set_label("dj6"); | ||
| 287 | &movb(&BP(5,$out,"",0), &HB("edx")); | 293 | &movb(&BP(5,$out,"",0), &HB("edx")); |
| 288 | &set_label("dj5"); | ||
| 289 | &movb(&BP(4,$out,"",0), &LB("edx")); | 294 | &movb(&BP(4,$out,"",0), &LB("edx")); |
| 290 | &set_label("dj4"); | ||
| 291 | &mov(&DWP(0,$out,"",0), "ecx"); | 295 | &mov(&DWP(0,$out,"",0), "ecx"); |
| 292 | &jmp(&label("djend")); | ||
| 293 | &set_label("dj3"); | ||
| 294 | &rotr("ecx", 16); | ||
| 295 | &movb(&BP(2,$out,"",0), &LB("ecx")); | ||
| 296 | &shl("ecx",16); | ||
| 297 | &set_label("dj2"); | ||
| 298 | &movb(&BP(1,$in,"",0), &HB("ecx")); | ||
| 299 | &set_label("dj1"); | ||
| 300 | &movb(&BP(0,$in,"",0), &LB("ecx")); | ||
| 301 | &set_label("djend"); | ||
| 302 | 296 | ||
| 303 | # final iv is still in eax:ebx | 297 | # final iv is still in eax:ebx |
| 304 | &jmp(&label("finish")); | ||
| 305 | |||
| 306 | 298 | ||
| 307 | ############################ FINISH #######################3 | 299 | ############################ FINISH #######################3 |
| 308 | &set_label("finish",1); | 300 | &set_label("finish",1); |
| @@ -319,31 +311,21 @@ sub cbc | |||
| 319 | &mov(&DWP(4,"ecx","",0), "ebx"); # save iv | 311 | &mov(&DWP(4,"ecx","",0), "ebx"); # save iv |
| 320 | 312 | ||
| 321 | &function_end_A($name); | 313 | &function_end_A($name); |
| 314 | &function_end_B($name); | ||
| 322 | 315 | ||
| 316 | &rodataseg(); | ||
| 323 | &align(64); | 317 | &align(64); |
| 324 | &set_label("cbc_enc_jmp_table"); | 318 | &set_label("cbc_enc_jmp_table_".$name); |
| 325 | &data_word("0"); | 319 | &data_word("0"); |
| 326 | &data_word(&label("ej1")."-".&label("PIC_point")); | 320 | &data_word(&code_sym(&label("ej1_".$name))); |
| 327 | &data_word(&label("ej2")."-".&label("PIC_point")); | 321 | &data_word(&code_sym(&label("ej2_".$name))); |
| 328 | &data_word(&label("ej3")."-".&label("PIC_point")); | 322 | &data_word(&code_sym(&label("ej3_".$name))); |
| 329 | &data_word(&label("ej4")."-".&label("PIC_point")); | 323 | &data_word(&code_sym(&label("ej4_".$name))); |
| 330 | &data_word(&label("ej5")."-".&label("PIC_point")); | 324 | &data_word(&code_sym(&label("ej5_".$name))); |
| 331 | &data_word(&label("ej6")."-".&label("PIC_point")); | 325 | &data_word(&code_sym(&label("ej6_".$name))); |
| 332 | &data_word(&label("ej7")."-".&label("PIC_point")); | 326 | &data_word(&code_sym(&label("ej7_".$name))); |
| 333 | # not used | 327 | &previous(); |
| 334 | #&set_label("cbc_dec_jmp_table",1); | ||
| 335 | #&data_word("0"); | ||
| 336 | #&data_word(&label("dj1")."-".&label("PIC_point")); | ||
| 337 | #&data_word(&label("dj2")."-".&label("PIC_point")); | ||
| 338 | #&data_word(&label("dj3")."-".&label("PIC_point")); | ||
| 339 | #&data_word(&label("dj4")."-".&label("PIC_point")); | ||
| 340 | #&data_word(&label("dj5")."-".&label("PIC_point")); | ||
| 341 | #&data_word(&label("dj6")."-".&label("PIC_point")); | ||
| 342 | #&data_word(&label("dj7")."-".&label("PIC_point")); | ||
| 343 | &align(64); | ||
| 344 | 328 | ||
| 345 | &function_end_B($name); | ||
| 346 | |||
| 347 | } | 329 | } |
| 348 | 330 | ||
| 349 | 1; | 331 | 1; |
diff --git a/src/lib/libcrypto/perlasm/x86gas.pl b/src/lib/libcrypto/perlasm/x86gas.pl index ca644ba553..f28a590549 100644 --- a/src/lib/libcrypto/perlasm/x86gas.pl +++ b/src/lib/libcrypto/perlasm/x86gas.pl | |||
| @@ -177,34 +177,52 @@ sub ::align | |||
| 177 | push(@out,".align\t$val\n"); | 177 | push(@out,".align\t$val\n"); |
| 178 | } | 178 | } |
| 179 | 179 | ||
| 180 | sub ::picmeup | 180 | # |
| 181 | { my($dst,$sym,$base,$reflabel)=@_; | 181 | # PIC data access wrappers |
| 182 | 182 | # | |
| 183 | if ($::openbsd) | 183 | # Usage: |
| 184 | { &::emitraw("#if defined(PIC) || defined(__PIC__)"); | 184 | # picsetup($base) |
| 185 | &::emitraw("PIC_PROLOGUE"); | 185 | # - only allowed once per function (because of hardcoded label name), |
| 186 | &::mov($dst, &::DWP("PIC_GOT($sym)")); | 186 | # sets up pic access, uses $base register as temporary |
| 187 | &::emitraw("PIC_EPILOGUE"); | 187 | # picsymbol($dst, $sym, $base) |
| 188 | &::emitraw("#else /* PIC */"); | 188 | # - loads the address of symbol $sym into $dst with the help of $base |
| 189 | &::lea($dst,&::DWP($sym)); | 189 | # initialized by picsetup |
| 190 | &::emitraw("#endif /* PIC */"); | 190 | # picadjust($sym, $base) |
| 191 | } | 191 | # - adjusts a code pointer read from a code_sym table with the help of |
| 192 | elsif (($::pic && ($::elf || $::aout)) || $::macosx) | 192 | # $base initialized by picsetup |
| 193 | { if (!defined($base)) | 193 | # code_sym($sym) |
| 194 | { &::call(&::label("PIC_me_up")); | 194 | # - emits a pointer to the given code symbol, relative to the GOT if |
| 195 | &::set_label("PIC_me_up"); | 195 | # PIC. This pointer will need to be adjusted with picadjust above |
| 196 | &::blindpop($dst); | 196 | # before use. |
| 197 | $base=$dst; | 197 | |
| 198 | $reflabel=&::label("PIC_me_up"); | 198 | sub ::picsetup |
| 199 | } | 199 | { my($base)=@_; |
| 200 | |||
| 201 | if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx) | ||
| 202 | { | ||
| 203 | &::call(&::label("PIC_setup")); | ||
| 204 | &::set_label("PIC_setup"); | ||
| 205 | &::blindpop($base); | ||
| 200 | if ($::macosx) | 206 | if ($::macosx) |
| 201 | { my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr"); | 207 | { my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr"); |
| 202 | &::mov($dst,&::DWP("$indirect-$reflabel",$base)); | ||
| 203 | $non_lazy_ptr{"$nmdecor$sym"}=$indirect; | 208 | $non_lazy_ptr{"$nmdecor$sym"}=$indirect; |
| 204 | } | 209 | } |
| 210 | } | ||
| 211 | } | ||
| 212 | |||
| 213 | sub ::picsymbol | ||
| 214 | { my($dst,$sym,$base)=@_; | ||
| 215 | |||
| 216 | if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx) | ||
| 217 | { | ||
| 218 | my $reflabel=&::label("PIC_setup"); | ||
| 219 | if ($::macosx) | ||
| 220 | { my $indirect=$non_lazy_ptr{"$nmdecor$sym"}; | ||
| 221 | &::mov($dst,&::DWP("$indirect-$reflabel",$base)); | ||
| 222 | } | ||
| 205 | else | 223 | else |
| 206 | { &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", | 224 | { &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", |
| 207 | $base)); | 225 | $base)); |
| 208 | &::mov($dst,&::DWP("$sym\@GOT",$dst)); | 226 | &::mov($dst,&::DWP("$sym\@GOT",$dst)); |
| 209 | } | 227 | } |
| 210 | } | 228 | } |
| @@ -212,6 +230,30 @@ sub ::picmeup | |||
| 212 | { &::lea($dst,&::DWP($sym)); } | 230 | { &::lea($dst,&::DWP($sym)); } |
| 213 | } | 231 | } |
| 214 | 232 | ||
| 233 | sub ::picadjust | ||
| 234 | { my($sym,$base)=@_; | ||
| 235 | |||
| 236 | if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx) | ||
| 237 | { | ||
| 238 | my $reflabel=&::label("PIC_setup"); | ||
| 239 | &::lea($sym,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", | ||
| 240 | $base,$sym)); | ||
| 241 | } | ||
| 242 | } | ||
| 243 | |||
| 244 | sub ::code_sym | ||
| 245 | { my($sym)=@_; | ||
| 246 | |||
| 247 | if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx) | ||
| 248 | { | ||
| 249 | $sym."\@GOTOFF"; | ||
| 250 | } | ||
| 251 | else | ||
| 252 | { | ||
| 253 | $sym; | ||
| 254 | } | ||
| 255 | } | ||
| 256 | |||
| 215 | sub ::initseg | 257 | sub ::initseg |
| 216 | { my $f=$nmdecor.shift; | 258 | { my $f=$nmdecor.shift; |
| 217 | 259 | ||
| @@ -264,4 +306,10 @@ ___ | |||
| 264 | sub ::dataseg | 306 | sub ::dataseg |
| 265 | { push(@out,".data\n"); } | 307 | { push(@out,".data\n"); } |
| 266 | 308 | ||
| 309 | sub ::rodataseg | ||
| 310 | { push(@out,".rodata\n"); } | ||
| 311 | |||
| 312 | sub ::previous | ||
| 313 | { push(@out,".previous\n"); } | ||
| 314 | |||
| 267 | 1; | 315 | 1; |
diff --git a/src/lib/libcrypto/rc4/asm/rc4-586.pl b/src/lib/libcrypto/rc4/asm/rc4-586.pl index f3c3e117bc..4991c37c2c 100644 --- a/src/lib/libcrypto/rc4/asm/rc4-586.pl +++ b/src/lib/libcrypto/rc4/asm/rc4-586.pl | |||
| @@ -188,7 +188,8 @@ if ($alt=0) { | |||
| 188 | &mov (&wparam(3),$out); # $out as accumulator in these loops | 188 | &mov (&wparam(3),$out); # $out as accumulator in these loops |
| 189 | &jz (&label("go4loop4")); | 189 | &jz (&label("go4loop4")); |
| 190 | 190 | ||
| 191 | &picmeup($out,"OPENSSL_ia32cap_P"); | 191 | &picsetup($out); |
| 192 | &picsymbol($out, "OPENSSL_ia32cap_P", $out); | ||
| 192 | # check SSE2 bit [could have been MMX] | 193 | # check SSE2 bit [could have been MMX] |
| 193 | &bt (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2"); | 194 | &bt (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2"); |
| 194 | &jnc (&label("go4loop4")); | 195 | &jnc (&label("go4loop4")); |
| @@ -305,7 +306,9 @@ $idx="edx"; | |||
| 305 | &mov ($out,&wparam(0)); # load key | 306 | &mov ($out,&wparam(0)); # load key |
| 306 | &mov ($idi,&wparam(1)); # load len | 307 | &mov ($idi,&wparam(1)); # load len |
| 307 | &mov ($inp,&wparam(2)); # load data | 308 | &mov ($inp,&wparam(2)); # load data |
| 308 | &picmeup($idx,"OPENSSL_ia32cap_P"); | 309 | |
| 310 | &picsetup($idx); | ||
| 311 | &picsymbol($idx, "OPENSSL_ia32cap_P", $idx); | ||
| 309 | 312 | ||
| 310 | &lea ($out,&DWP(2*4,$out)); # &key->data | 313 | &lea ($out,&DWP(2*4,$out)); # &key->data |
| 311 | &lea ($inp,&DWP(0,$inp,$idi)); # $inp to point at the end | 314 | &lea ($inp,&DWP(0,$inp,$idi)); # $inp to point at the end |
| @@ -382,12 +385,12 @@ $idx="edx"; | |||
| 382 | &function_end("RC4_set_key"); | 385 | &function_end("RC4_set_key"); |
| 383 | 386 | ||
| 384 | # const char *RC4_options(void); | 387 | # const char *RC4_options(void); |
| 388 | &static_label("opts"); | ||
| 385 | &function_begin_B("RC4_options"); | 389 | &function_begin_B("RC4_options"); |
| 386 | &call (&label("pic_point")); | 390 | &picsetup("edx"); |
| 387 | &set_label("pic_point"); | 391 | &picsymbol("eax", &label("opts"), "edx"); |
| 388 | &blindpop("eax"); | 392 | &picsymbol("edx", "OPENSSL_ia32cap_P", "edx");; |
| 389 | &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax")); | 393 | |
| 390 | &picmeup("edx","OPENSSL_ia32cap_P"); | ||
| 391 | &mov ("edx",&DWP(0,"edx")); | 394 | &mov ("edx",&DWP(0,"edx")); |
| 392 | &bt ("edx","\$IA32CAP_BIT0_INTELP4"); | 395 | &bt ("edx","\$IA32CAP_BIT0_INTELP4"); |
| 393 | &jc (&label("1xchar")); | 396 | &jc (&label("1xchar")); |
| @@ -399,13 +402,14 @@ $idx="edx"; | |||
| 399 | &add ("eax",12); | 402 | &add ("eax",12); |
| 400 | &set_label("ret"); | 403 | &set_label("ret"); |
| 401 | &ret (); | 404 | &ret (); |
| 402 | &set_label("opts",64); | 405 | &function_end_B("RC4_options"); |
| 406 | |||
| 407 | &rodataseg(); | ||
| 408 | &set_label("opts"); | ||
| 403 | &asciz ("rc4(4x,int)"); | 409 | &asciz ("rc4(4x,int)"); |
| 404 | &asciz ("rc4(1x,char)"); | 410 | &asciz ("rc4(1x,char)"); |
| 405 | &asciz ("rc4(8x,mmx)"); | 411 | &asciz ("rc4(8x,mmx)"); |
| 406 | &asciz ("RC4 for x86, CRYPTOGAMS by <appro\@openssl.org>"); | 412 | &previous(); |
| 407 | &align (64); | ||
| 408 | &function_end_B("RC4_options"); | ||
| 409 | 413 | ||
| 410 | &asm_finish(); | 414 | &asm_finish(); |
| 411 | 415 | ||
diff --git a/src/lib/libcrypto/sha/asm/sha1-586.pl b/src/lib/libcrypto/sha/asm/sha1-586.pl index 1de5e2650e..5928e083c1 100644 --- a/src/lib/libcrypto/sha/asm/sha1-586.pl +++ b/src/lib/libcrypto/sha/asm/sha1-586.pl | |||
| @@ -295,11 +295,9 @@ if ($xmm) { | |||
| 295 | &static_label("avx_shortcut") if ($ymm); | 295 | &static_label("avx_shortcut") if ($ymm); |
| 296 | &static_label("K_XX_XX"); | 296 | &static_label("K_XX_XX"); |
| 297 | 297 | ||
| 298 | &call (&label("pic_point")); # make it PIC! | 298 | &picsetup($tmp1); |
| 299 | &set_label("pic_point"); | 299 | &picsymbol($T, "OPENSSL_ia32cap_P", $tmp1); |
| 300 | &blindpop($tmp1); | 300 | &picsymbol($tmp1, &label("K_XX_XX"), $tmp1); |
| 301 | &picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point")); | ||
| 302 | &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); | ||
| 303 | 301 | ||
| 304 | &mov ($A,&DWP(0,$T)); | 302 | &mov ($A,&DWP(0,$T)); |
| 305 | &mov ($D,&DWP(4,$T)); | 303 | &mov ($D,&DWP(4,$T)); |
| @@ -419,10 +417,9 @@ my $_rol=sub { &rol(@_) }; | |||
| 419 | my $_ror=sub { &ror(@_) }; | 417 | my $_ror=sub { &ror(@_) }; |
| 420 | 418 | ||
| 421 | &function_begin("_sha1_block_data_order_ssse3"); | 419 | &function_begin("_sha1_block_data_order_ssse3"); |
| 422 | &call (&label("pic_point")); # make it PIC! | 420 | &picsetup($tmp1); |
| 423 | &set_label("pic_point"); | 421 | &picsymbol($tmp1, &label("K_XX_XX"), $tmp1); |
| 424 | &blindpop($tmp1); | 422 | |
| 425 | &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); | ||
| 426 | &set_label("ssse3_shortcut"); | 423 | &set_label("ssse3_shortcut"); |
| 427 | 424 | ||
| 428 | &movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19 | 425 | &movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19 |
| @@ -861,10 +858,9 @@ my $_rol=sub { &shld(@_[0],@_) }; | |||
| 861 | my $_ror=sub { &shrd(@_[0],@_) }; | 858 | my $_ror=sub { &shrd(@_[0],@_) }; |
| 862 | 859 | ||
| 863 | &function_begin("_sha1_block_data_order_avx"); | 860 | &function_begin("_sha1_block_data_order_avx"); |
| 864 | &call (&label("pic_point")); # make it PIC! | 861 | &picsetup($tmp1); |
| 865 | &set_label("pic_point"); | 862 | &picsymbol($tmp1, &label("K_XX_XX"), $tmp1); |
| 866 | &blindpop($tmp1); | 863 | |
| 867 | &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1)); | ||
| 868 | &set_label("avx_shortcut"); | 864 | &set_label("avx_shortcut"); |
| 869 | &vzeroall(); | 865 | &vzeroall(); |
| 870 | 866 | ||
| @@ -1213,13 +1209,15 @@ sub Xtail_avx() | |||
| 1213 | &mov (&DWP(16,@T[1]),$E); | 1209 | &mov (&DWP(16,@T[1]),$E); |
| 1214 | &function_end("_sha1_block_data_order_avx"); | 1210 | &function_end("_sha1_block_data_order_avx"); |
| 1215 | } | 1211 | } |
| 1212 | |||
| 1213 | &rodataseg(); | ||
| 1216 | &set_label("K_XX_XX",64); | 1214 | &set_label("K_XX_XX",64); |
| 1217 | &data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19 | 1215 | &data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19 |
| 1218 | &data_word(0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1); # K_20_39 | 1216 | &data_word(0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1); # K_20_39 |
| 1219 | &data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc); # K_40_59 | 1217 | &data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc); # K_40_59 |
| 1220 | &data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6); # K_60_79 | 1218 | &data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6); # K_60_79 |
| 1221 | &data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f); # pbswap mask | 1219 | &data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f); # pbswap mask |
| 1220 | &previous(); | ||
| 1222 | } | 1221 | } |
| 1223 | &asciz("SHA1 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
| 1224 | 1222 | ||
| 1225 | &asm_finish(); | 1223 | &asm_finish(); |
diff --git a/src/lib/libcrypto/sha/asm/sha256-586.pl b/src/lib/libcrypto/sha/asm/sha256-586.pl index 928ec53123..2b05c96063 100644 --- a/src/lib/libcrypto/sha/asm/sha256-586.pl +++ b/src/lib/libcrypto/sha/asm/sha256-586.pl | |||
| @@ -96,16 +96,15 @@ sub BODY_00_15() { | |||
| 96 | &add ($A,"esi"); # h += K256[i] | 96 | &add ($A,"esi"); # h += K256[i] |
| 97 | } | 97 | } |
| 98 | 98 | ||
| 99 | &static_label("K256"); | ||
| 99 | &function_begin("sha256_block_data_order"); | 100 | &function_begin("sha256_block_data_order"); |
| 100 | &mov ("esi",wparam(0)); # ctx | 101 | &mov ("esi",wparam(0)); # ctx |
| 101 | &mov ("edi",wparam(1)); # inp | 102 | &mov ("edi",wparam(1)); # inp |
| 102 | &mov ("eax",wparam(2)); # num | 103 | &mov ("eax",wparam(2)); # num |
| 103 | &mov ("ebx","esp"); # saved sp | 104 | &mov ("ebx","esp"); # saved sp |
| 104 | 105 | ||
| 105 | &call (&label("pic_point")); # make it PIC! | 106 | &picsetup($K256); |
| 106 | &set_label("pic_point"); | 107 | &picsymbol($K256, &label("K256"), $K256); |
| 107 | &blindpop($K256); | ||
| 108 | &lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256)); | ||
| 109 | 108 | ||
| 110 | &sub ("esp",16); | 109 | &sub ("esp",16); |
| 111 | &and ("esp",-64); | 110 | &and ("esp",-64); |
| @@ -225,8 +224,10 @@ sub BODY_00_15() { | |||
| 225 | 224 | ||
| 226 | &mov ("esp",&DWP(12,"esp")); # restore sp | 225 | &mov ("esp",&DWP(12,"esp")); # restore sp |
| 227 | &function_end_A(); | 226 | &function_end_A(); |
| 227 | &function_end_B("sha256_block_data_order"); | ||
| 228 | 228 | ||
| 229 | &set_label("K256",64); # Yes! I keep it in the code segment! | 229 | &rodataseg(); |
| 230 | &set_label("K256",64); | ||
| 230 | &data_word(0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5); | 231 | &data_word(0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5); |
| 231 | &data_word(0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5); | 232 | &data_word(0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5); |
| 232 | &data_word(0xd807aa98,0x12835b01,0x243185be,0x550c7dc3); | 233 | &data_word(0xd807aa98,0x12835b01,0x243185be,0x550c7dc3); |
| @@ -243,7 +244,6 @@ sub BODY_00_15() { | |||
| 243 | &data_word(0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3); | 244 | &data_word(0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3); |
| 244 | &data_word(0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208); | 245 | &data_word(0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208); |
| 245 | &data_word(0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2); | 246 | &data_word(0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2); |
| 246 | &function_end_B("sha256_block_data_order"); | 247 | &previous(); |
| 247 | &asciz("SHA256 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
| 248 | 248 | ||
| 249 | &asm_finish(); | 249 | &asm_finish(); |
diff --git a/src/lib/libcrypto/sha/asm/sha512-586.pl b/src/lib/libcrypto/sha/asm/sha512-586.pl index 163361ebe9..c1d0684e92 100644 --- a/src/lib/libcrypto/sha/asm/sha512-586.pl +++ b/src/lib/libcrypto/sha/asm/sha512-586.pl | |||
| @@ -261,16 +261,18 @@ sub BODY_00_15_x86 { | |||
| 261 | } | 261 | } |
| 262 | 262 | ||
| 263 | 263 | ||
| 264 | &static_label("K512"); | ||
| 264 | &function_begin("sha512_block_data_order"); | 265 | &function_begin("sha512_block_data_order"); |
| 265 | &mov ("esi",wparam(0)); # ctx | 266 | &mov ("esi",wparam(0)); # ctx |
| 266 | &mov ("edi",wparam(1)); # inp | 267 | &mov ("edi",wparam(1)); # inp |
| 267 | &mov ("eax",wparam(2)); # num | 268 | &mov ("eax",wparam(2)); # num |
| 268 | &mov ("ebx","esp"); # saved sp | 269 | &mov ("ebx","esp"); # saved sp |
| 269 | 270 | ||
| 270 | &call (&label("pic_point")); # make it PIC! | 271 | &picsetup($K512); |
| 271 | &set_label("pic_point"); | 272 | if ($sse2) { |
| 272 | &blindpop($K512); | 273 | &picsymbol("edx", "OPENSSL_ia32cap_P", $K512); |
| 273 | &lea ($K512,&DWP(&label("K512")."-".&label("pic_point"),$K512)); | 274 | } |
| 275 | &picsymbol($K512, &label("K512"), $K512); | ||
| 274 | 276 | ||
| 275 | &sub ("esp",16); | 277 | &sub ("esp",16); |
| 276 | &and ("esp",-64); | 278 | &and ("esp",-64); |
| @@ -283,7 +285,6 @@ sub BODY_00_15_x86 { | |||
| 283 | &mov (&DWP(12,"esp"),"ebx"); # saved sp | 285 | &mov (&DWP(12,"esp"),"ebx"); # saved sp |
| 284 | 286 | ||
| 285 | if ($sse2) { | 287 | if ($sse2) { |
| 286 | &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512")); | ||
| 287 | &bt (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2"); | 288 | &bt (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2"); |
| 288 | &jnc (&label("loop_x86")); | 289 | &jnc (&label("loop_x86")); |
| 289 | 290 | ||
| @@ -556,8 +557,10 @@ if ($sse2) { | |||
| 556 | 557 | ||
| 557 | &mov ("esp",&DWP(12,"esp")); # restore sp | 558 | &mov ("esp",&DWP(12,"esp")); # restore sp |
| 558 | &function_end_A(); | 559 | &function_end_A(); |
| 560 | &function_end_B("sha512_block_data_order"); | ||
| 559 | 561 | ||
| 560 | &set_label("K512",64); # Yes! I keep it in the code segment! | 562 | &rodataseg(); |
| 563 | &set_label("K512",64); | ||
| 561 | &data_word(0xd728ae22,0x428a2f98); # u64 | 564 | &data_word(0xd728ae22,0x428a2f98); # u64 |
| 562 | &data_word(0x23ef65cd,0x71374491); # u64 | 565 | &data_word(0x23ef65cd,0x71374491); # u64 |
| 563 | &data_word(0xec4d3b2f,0xb5c0fbcf); # u64 | 566 | &data_word(0xec4d3b2f,0xb5c0fbcf); # u64 |
| @@ -638,7 +641,6 @@ if ($sse2) { | |||
| 638 | &data_word(0xfc657e2a,0x597f299c); # u64 | 641 | &data_word(0xfc657e2a,0x597f299c); # u64 |
| 639 | &data_word(0x3ad6faec,0x5fcb6fab); # u64 | 642 | &data_word(0x3ad6faec,0x5fcb6fab); # u64 |
| 640 | &data_word(0x4a475817,0x6c44198c); # u64 | 643 | &data_word(0x4a475817,0x6c44198c); # u64 |
| 641 | &function_end_B("sha512_block_data_order"); | 644 | &previous(); |
| 642 | &asciz("SHA512 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>"); | ||
| 643 | 645 | ||
| 644 | &asm_finish(); | 646 | &asm_finish(); |
diff --git a/src/lib/libcrypto/whrlpool/asm/wp-mmx.pl b/src/lib/libcrypto/whrlpool/asm/wp-mmx.pl index 0ff8e5b612..a54d702c3f 100644 --- a/src/lib/libcrypto/whrlpool/asm/wp-mmx.pl +++ b/src/lib/libcrypto/whrlpool/asm/wp-mmx.pl | |||
| @@ -77,6 +77,8 @@ sub row() | |||
| 77 | $tbl="ebp"; | 77 | $tbl="ebp"; |
| 78 | @mm=("mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7"); | 78 | @mm=("mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7"); |
| 79 | 79 | ||
| 80 | &static_label("table"); | ||
| 81 | |||
| 80 | &function_begin_B("whirlpool_block_mmx"); | 82 | &function_begin_B("whirlpool_block_mmx"); |
| 81 | &push ("ebp"); | 83 | &push ("ebp"); |
| 82 | &push ("ebx"); | 84 | &push ("ebx"); |
| @@ -97,10 +99,8 @@ $tbl="ebp"; | |||
| 97 | &mov (&DWP(8,"ebx"),"ebp"); | 99 | &mov (&DWP(8,"ebx"),"ebp"); |
| 98 | &mov (&DWP(16,"ebx"),"eax"); # saved stack pointer | 100 | &mov (&DWP(16,"ebx"),"eax"); # saved stack pointer |
| 99 | 101 | ||
| 100 | &call (&label("pic_point")); | 102 | &picsetup($tbl); |
| 101 | &set_label("pic_point"); | 103 | &picsymbol($tbl, &label("table"), $tbl); |
| 102 | &blindpop($tbl); | ||
| 103 | &lea ($tbl,&DWP(&label("table")."-".&label("pic_point"),$tbl)); | ||
| 104 | 104 | ||
| 105 | &xor ("ecx","ecx"); | 105 | &xor ("ecx","ecx"); |
| 106 | &xor ("edx","edx"); | 106 | &xor ("edx","edx"); |
| @@ -218,7 +218,9 @@ for($i=0;$i<8;$i++) { | |||
| 218 | &pop ("ebx"); | 218 | &pop ("ebx"); |
| 219 | &pop ("ebp"); | 219 | &pop ("ebp"); |
| 220 | &ret (); | 220 | &ret (); |
| 221 | &function_end_B("whirlpool_block_mmx"); | ||
| 221 | 222 | ||
| 223 | &rodataseg(); | ||
| 222 | &align(64); | 224 | &align(64); |
| 223 | &set_label("table"); | 225 | &set_label("table"); |
| 224 | &LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8); | 226 | &LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8); |
| @@ -488,6 +490,6 @@ for($i=0;$i<8;$i++) { | |||
| 488 | &L(0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8); | 490 | &L(0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8); |
| 489 | &L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e); | 491 | &L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e); |
| 490 | &L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33); | 492 | &L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33); |
| 493 | &previous(); | ||
| 491 | 494 | ||
| 492 | &function_end_B("whirlpool_block_mmx"); | ||
| 493 | &asm_finish(); | 495 | &asm_finish(); |
