diff options
Diffstat (limited to 'src/lib/libcrypto/x86_64cpuid.pl')
-rw-r--r-- | src/lib/libcrypto/x86_64cpuid.pl | 36 |
1 files changed, 20 insertions, 16 deletions
diff --git a/src/lib/libcrypto/x86_64cpuid.pl b/src/lib/libcrypto/x86_64cpuid.pl index b36d3f7dc5..6558dedb6b 100644 --- a/src/lib/libcrypto/x86_64cpuid.pl +++ b/src/lib/libcrypto/x86_64cpuid.pl | |||
@@ -20,8 +20,8 @@ print<<___; | |||
20 | .section .init | 20 | .section .init |
21 | call OPENSSL_cpuid_setup | 21 | call OPENSSL_cpuid_setup |
22 | 22 | ||
23 | .extern OPENSSL_ia32cap_P | ||
23 | .hidden OPENSSL_ia32cap_P | 24 | .hidden OPENSSL_ia32cap_P |
24 | .comm OPENSSL_ia32cap_P,8,4 | ||
25 | 25 | ||
26 | .text | 26 | .text |
27 | 27 | ||
@@ -80,8 +80,8 @@ OPENSSL_ia32_cpuid: | |||
80 | mov %eax,%r10d | 80 | mov %eax,%r10d |
81 | mov \$0x80000001,%eax | 81 | mov \$0x80000001,%eax |
82 | cpuid | 82 | cpuid |
83 | or %ecx,%r9d | 83 | and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP bit |
84 | and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 | 84 | or \$1,%r9d # make sure %r9d is not zero |
85 | 85 | ||
86 | cmp \$0x80000008,%r10d | 86 | cmp \$0x80000008,%r10d |
87 | jb .Lintel | 87 | jb .Lintel |
@@ -93,12 +93,12 @@ OPENSSL_ia32_cpuid: | |||
93 | 93 | ||
94 | mov \$1,%eax | 94 | mov \$1,%eax |
95 | cpuid | 95 | cpuid |
96 | bt \$28,%edx # test hyper-threading bit | 96 | bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit |
97 | jnc .Lgeneric | 97 | jnc .Lgeneric |
98 | shr \$16,%ebx # number of logical processors | 98 | shr \$16,%ebx # number of logical processors |
99 | cmp %r10b,%bl | 99 | cmp %r10b,%bl |
100 | ja .Lgeneric | 100 | ja .Lgeneric |
101 | and \$0xefffffff,%edx # ~(1<<28) | 101 | xor \$IA32CAP_MASK0_HT,%edx |
102 | jmp .Lgeneric | 102 | jmp .Lgeneric |
103 | 103 | ||
104 | .Lintel: | 104 | .Lintel: |
@@ -116,33 +116,37 @@ OPENSSL_ia32_cpuid: | |||
116 | .Lnocacheinfo: | 116 | .Lnocacheinfo: |
117 | mov \$1,%eax | 117 | mov \$1,%eax |
118 | cpuid | 118 | cpuid |
119 | and \$0xbfefffff,%edx # force reserved bits to 0 | 119 | # force reserved bits to 0 |
120 | and \$(~(IA32CAP_MASK0_INTELP4 | IA32CAP_MASK0_INTEL)),%edx | ||
120 | cmp \$0,%r9d | 121 | cmp \$0,%r9d |
121 | jne .Lnotintel | 122 | jne .Lnotintel |
122 | or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs | 123 | # set reserved bit#30 on Intel CPUs |
124 | or \$IA32CAP_MASK0_INTEL,%edx | ||
123 | and \$15,%ah | 125 | and \$15,%ah |
124 | cmp \$15,%ah # examine Family ID | 126 | cmp \$15,%ah # examine Family ID |
125 | jne .Lnotintel | 127 | jne .Lnotintel |
126 | or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR | 128 | # set reserved bit#20 to engage RC4_CHAR |
129 | or \$IA32CAP_MASK0_INTELP4,%edx | ||
127 | .Lnotintel: | 130 | .Lnotintel: |
128 | bt \$28,%edx # test hyper-threading bit | 131 | bt \$IA32CAP_BIT0_HT,%edx # test hyper-threading bit |
129 | jnc .Lgeneric | 132 | jnc .Lgeneric |
130 | and \$0xefffffff,%edx # ~(1<<28) | 133 | xor \$IA32CAP_MASK0_HT,%edx |
131 | cmp \$0,%r10d | 134 | cmp \$0,%r10d |
132 | je .Lgeneric | 135 | je .Lgeneric |
133 | 136 | ||
134 | or \$0x10000000,%edx # 1<<28 | 137 | or \$IA32CAP_MASK0_HT,%edx |
135 | shr \$16,%ebx | 138 | shr \$16,%ebx |
136 | cmp \$1,%bl # see if cache is shared | 139 | cmp \$1,%bl # see if cache is shared |
137 | ja .Lgeneric | 140 | ja .Lgeneric |
138 | and \$0xefffffff,%edx # ~(1<<28) | 141 | xor \$IA32CAP_MASK0_HT,%edx # clear hyper-threading bit if not |
142 | |||
139 | .Lgeneric: | 143 | .Lgeneric: |
140 | and \$0x00000800,%r9d # isolate AMD XOP flag | 144 | and \$IA32CAP_MASK1_AMD_XOP,%r9d # isolate AMD XOP flag |
141 | and \$0xfffff7ff,%ecx | 145 | and \$(~IA32CAP_MASK1_AMD_XOP),%ecx |
142 | or %ecx,%r9d # merge AMD XOP flag | 146 | or %ecx,%r9d # merge AMD XOP flag |
143 | 147 | ||
144 | mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx | 148 | mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx |
145 | bt \$27,%r9d # check OSXSAVE bit | 149 | bt \$IA32CAP_BIT1_OSXSAVE,%r9d # check OSXSAVE bit |
146 | jnc .Lclear_avx | 150 | jnc .Lclear_avx |
147 | xor %ecx,%ecx # XCR0 | 151 | xor %ecx,%ecx # XCR0 |
148 | .byte 0x0f,0x01,0xd0 # xgetbv | 152 | .byte 0x0f,0x01,0xd0 # xgetbv |
@@ -150,7 +154,7 @@ OPENSSL_ia32_cpuid: | |||
150 | cmp \$6,%eax | 154 | cmp \$6,%eax |
151 | je .Ldone | 155 | je .Ldone |
152 | .Lclear_avx: | 156 | .Lclear_avx: |
153 | mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) | 157 | mov \$(~(IA32CAP_MASK1_AVX | IA32CAP_MASK1_FMA3 | IA32CAP_MASK1_AMD_XOP)),%eax |
154 | and %eax,%r9d # clear AVX, FMA and AMD XOP bits | 158 | and %eax,%r9d # clear AVX, FMA and AMD XOP bits |
155 | .Ldone: | 159 | .Ldone: |
156 | shl \$32,%r9 | 160 | shl \$32,%r9 |