diff options
| author | miod <> | 2016-11-04 17:30:30 +0000 |
|---|---|---|
| committer | miod <> | 2016-11-04 17:30:30 +0000 |
| commit | 1a12fc8399638223feca8f853e2ac2cc22eeb471 (patch) | |
| tree | 77b413175d422148cfb0ef7b2062340230aa5413 /src/lib/libcrypto/x86cpuid.pl | |
| parent | 78e68d71838891e44ddbb5238203ccfce3b62d80 (diff) | |
| download | openbsd-1a12fc8399638223feca8f853e2ac2cc22eeb471.tar.gz openbsd-1a12fc8399638223feca8f853e2ac2cc22eeb471.tar.bz2 openbsd-1a12fc8399638223feca8f853e2ac2cc22eeb471.zip | |
Replace all uses of magic numbers when operating on OPENSSL_ia32_P[] by
meaningful constants in a private header file, so that reviewers can actually
get a chance to figure out what the code is attempting to do without knowing
all cpuid bits.
While there, turn it from an array of two 32-bit ints into a properly aligned
64-bit int.
Use of OPENSSL_ia32_P is now restricted to the assembler parts. C code will
now always use OPENSSL_cpu_caps() and check for the proper bits in the
whole 64-bit word it returns.
i386 tests and ok jsing@
Diffstat (limited to 'src/lib/libcrypto/x86cpuid.pl')
| -rw-r--r-- | src/lib/libcrypto/x86cpuid.pl | 56 |
1 files changed, 33 insertions, 23 deletions
diff --git a/src/lib/libcrypto/x86cpuid.pl b/src/lib/libcrypto/x86cpuid.pl index 7918629f64..8b9570fc72 100644 --- a/src/lib/libcrypto/x86cpuid.pl +++ b/src/lib/libcrypto/x86cpuid.pl | |||
| @@ -56,8 +56,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
| 56 | &mov ("esi","eax"); | 56 | &mov ("esi","eax"); |
| 57 | &mov ("eax",0x80000001); | 57 | &mov ("eax",0x80000001); |
| 58 | &cpuid (); | 58 | &cpuid (); |
| 59 | &or ("ebp","ecx"); | 59 | &and ("ecx","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP bit |
| 60 | &and ("ebp",1<<11|1); # isolate XOP bit | 60 | &or ("ecx",1); # make sure ecx is not zero |
| 61 | &mov ("ebp","ecx"); | ||
| 62 | |||
| 61 | &cmp ("esi",0x80000008); | 63 | &cmp ("esi",0x80000008); |
| 62 | &jb (&label("intel")); | 64 | &jb (&label("intel")); |
| 63 | 65 | ||
| @@ -69,13 +71,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
| 69 | &mov ("eax",1); | 71 | &mov ("eax",1); |
| 70 | &xor ("ecx","ecx"); | 72 | &xor ("ecx","ecx"); |
| 71 | &cpuid (); | 73 | &cpuid (); |
| 72 | &bt ("edx",28); | 74 | &bt ("edx","\$IA32CAP_BIT0_HT"); |
| 73 | &jnc (&label("generic")); | 75 | &jnc (&label("generic")); |
| 74 | &shr ("ebx",16); | 76 | &shr ("ebx",16); |
| 75 | &and ("ebx",0xff); | 77 | &and ("ebx",0xff); |
| 76 | &cmp ("ebx","esi"); | 78 | &cmp ("ebx","esi"); |
| 77 | &ja (&label("generic")); | 79 | &ja (&label("generic")); |
| 78 | &and ("edx",0xefffffff); # clear hyper-threading bit | 80 | &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit |
| 79 | &jmp (&label("generic")); | 81 | &jmp (&label("generic")); |
| 80 | 82 | ||
| 81 | &set_label("intel"); | 83 | &set_label("intel"); |
| @@ -94,34 +96,38 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
| 94 | &mov ("eax",1); | 96 | &mov ("eax",1); |
| 95 | &xor ("ecx","ecx"); | 97 | &xor ("ecx","ecx"); |
| 96 | &cpuid (); | 98 | &cpuid (); |
| 97 | &and ("edx",0xbfefffff); # force reserved bits #20, #30 to 0 | 99 | # force reserved bits to 0. |
| 100 | &and ("edx","\$~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)"); | ||
| 98 | &cmp ("ebp",0); | 101 | &cmp ("ebp",0); |
| 99 | &jne (&label("notintel")); | 102 | &jne (&label("notintel")); |
| 100 | &or ("edx",1<<30); # set reserved bit#30 on Intel CPUs | 103 | # set reserved bit#30 on Intel CPUs |
| 101 | &and (&HB("eax"),15); # familiy ID | 104 | &or ("edx","\$IA32CAP_MASK0_INTEL"); |
| 105 | &and (&HB("eax"),15); # family ID | ||
| 102 | &cmp (&HB("eax"),15); # P4? | 106 | &cmp (&HB("eax"),15); # P4? |
| 103 | &jne (&label("notintel")); | 107 | &jne (&label("notintel")); |
| 104 | &or ("edx",1<<20); # set reserved bit#20 to engage RC4_CHAR | 108 | # set reserved bit#20 to engage RC4_CHAR |
| 109 | &or ("edx","\$IA32CAP_MASK0_INTELP4"); | ||
| 105 | &set_label("notintel"); | 110 | &set_label("notintel"); |
| 106 | &bt ("edx",28); # test hyper-threading bit | 111 | &bt ("edx","\$IA32CAP_BIT0_HT"); # test hyper-threading bit |
| 107 | &jnc (&label("generic")); | 112 | &jnc (&label("generic")); |
| 108 | &and ("edx",0xefffffff); | 113 | &xor ("edx","\$IA32CAP_MASK0_HT"); |
| 109 | &cmp ("edi",0); | 114 | &cmp ("edi",0); |
| 110 | &je (&label("generic")); | 115 | &je (&label("generic")); |
| 111 | 116 | ||
| 112 | &or ("edx",0x10000000); | 117 | &or ("edx","\$IA32CAP_MASK0_HT"); |
| 113 | &shr ("ebx",16); | 118 | &shr ("ebx",16); |
| 114 | &cmp (&LB("ebx"),1); | 119 | &cmp (&LB("ebx"),1); # see if cache is shared |
| 115 | &ja (&label("generic")); | 120 | &ja (&label("generic")); |
| 116 | &and ("edx",0xefffffff); # clear hyper-threading bit if not | 121 | &xor ("edx","\$IA32CAP_MASK0_HT"); # clear hyper-threading bit if not |
| 117 | 122 | ||
| 118 | &set_label("generic"); | 123 | &set_label("generic"); |
| 119 | &and ("ebp",1<<11); # isolate AMD XOP flag | 124 | &and ("ebp","\$IA32CAP_MASK1_AMD_XOP"); # isolate AMD XOP flag |
| 120 | &and ("ecx",0xfffff7ff); # force 11th bit to 0 | 125 | # force reserved bits to 0. |
| 126 | &and ("ecx","\$~IA32CAP_MASK1_AMD_XOP"); | ||
| 121 | &mov ("esi","edx"); | 127 | &mov ("esi","edx"); |
| 122 | &or ("ebp","ecx"); # merge AMD XOP flag | 128 | &or ("ebp","ecx"); # merge AMD XOP flag |
| 123 | 129 | ||
| 124 | &bt ("ecx",27); # check OSXSAVE bit | 130 | &bt ("ecx","\$IA32CAP_BIT1_OSXSAVE"); # check OSXSAVE bit |
| 125 | &jnc (&label("clear_avx")); | 131 | &jnc (&label("clear_avx")); |
| 126 | &xor ("ecx","ecx"); | 132 | &xor ("ecx","ecx"); |
| 127 | &data_byte(0x0f,0x01,0xd0); # xgetbv | 133 | &data_byte(0x0f,0x01,0xd0); # xgetbv |
| @@ -131,10 +137,13 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
| 131 | &cmp ("eax",2); | 137 | &cmp ("eax",2); |
| 132 | &je (&label("clear_avx")); | 138 | &je (&label("clear_avx")); |
| 133 | &set_label("clear_xmm"); | 139 | &set_label("clear_xmm"); |
| 134 | &and ("ebp",0xfdfffffd); # clear AESNI and PCLMULQDQ bits | 140 | # clear AESNI and PCLMULQDQ bits. |
| 135 | &and ("esi",0xfeffffff); # clear FXSR | 141 | &and ("ebp","\$~(IA32CAP_MASK1_AESNI | IA32CAP_MASK1_PCLMUL)"); |
| 142 | # clear FXSR. | ||
| 143 | &and ("esi","\$~IA32CAP_MASK0_FXSR"); | ||
| 136 | &set_label("clear_avx"); | 144 | &set_label("clear_avx"); |
| 137 | &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits | 145 | # clear AVX, FMA3 and AMD XOP bits. |
| 146 | &and ("ebp","\$~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)"); | ||
| 138 | &set_label("done"); | 147 | &set_label("done"); |
| 139 | &mov ("eax","esi"); | 148 | &mov ("eax","esi"); |
| 140 | &mov ("edx","ebp"); | 149 | &mov ("edx","ebp"); |
| @@ -143,16 +152,17 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } | |||
| 143 | 152 | ||
| 144 | &external_label("OPENSSL_ia32cap_P"); | 153 | &external_label("OPENSSL_ia32cap_P"); |
| 145 | 154 | ||
| 146 | &function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); | 155 | &function_begin_B("OPENSSL_wipe_cpu",""); |
| 147 | &xor ("eax","eax"); | 156 | &xor ("eax","eax"); |
| 148 | &xor ("edx","edx"); | 157 | &xor ("edx","edx"); |
| 149 | &picmeup("ecx","OPENSSL_ia32cap_P"); | 158 | &picmeup("ecx","OPENSSL_ia32cap_P"); |
| 150 | &mov ("ecx",&DWP(0,"ecx")); | 159 | &mov ("ecx",&DWP(0,"ecx")); |
| 151 | &bt (&DWP(0,"ecx"),0); | 160 | &bt (&DWP(0,"ecx"),"\$IA32CAP_BIT0_FPU"); |
| 152 | &jnc (&label("no_x87")); | 161 | &jnc (&label("no_x87")); |
| 153 | if ($sse2) { | 162 | if ($sse2) { |
| 154 | &and ("ecx",1<<26|1<<24); # check SSE2 and FXSR bits | 163 | # Check SSE2 and FXSR bits. |
| 155 | &cmp ("ecx",1<<26|1<<24); | 164 | &and ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)"); |
| 165 | &cmp ("ecx", "\$(IA32CAP_MASK0_FXSR | IA32CAP_MASK0_SSE2)"); | ||
| 156 | &jne (&label("no_sse2")); | 166 | &jne (&label("no_sse2")); |
| 157 | &pxor ("xmm0","xmm0"); | 167 | &pxor ("xmm0","xmm0"); |
| 158 | &pxor ("xmm1","xmm1"); | 168 | &pxor ("xmm1","xmm1"); |
