diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib/libcrypto/x86_64cpuid.pl | 87 |
1 files changed, 69 insertions, 18 deletions
diff --git a/src/lib/libcrypto/x86_64cpuid.pl b/src/lib/libcrypto/x86_64cpuid.pl index c96821a3c8..7b7b93b223 100644 --- a/src/lib/libcrypto/x86_64cpuid.pl +++ b/src/lib/libcrypto/x86_64cpuid.pl | |||
@@ -7,15 +7,24 @@ if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | |||
7 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); | 7 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); |
8 | 8 | ||
9 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | 9 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
10 | open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output"; | 10 | ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or |
11 | ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or | ||
12 | die "can't locate x86_64-xlate.pl"; | ||
13 | |||
14 | open STDOUT,"| $^X $xlate $flavour $output"; | ||
15 | |||
16 | ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order | ||
17 | ("%rdi","%rsi","%rdx","%rcx"); # Unix order | ||
11 | 18 | ||
12 | if ($win64) { $arg1="%rcx"; $arg2="%rdx"; } | ||
13 | else { $arg1="%rdi"; $arg2="%rsi"; } | ||
14 | print<<___; | 19 | print<<___; |
15 | .extern OPENSSL_cpuid_setup | 20 | .extern OPENSSL_cpuid_setup |
21 | .hidden OPENSSL_cpuid_setup | ||
16 | .section .init | 22 | .section .init |
17 | call OPENSSL_cpuid_setup | 23 | call OPENSSL_cpuid_setup |
18 | 24 | ||
25 | .hidden OPENSSL_ia32cap_P | ||
26 | .comm OPENSSL_ia32cap_P,8,4 | ||
27 | |||
19 | .text | 28 | .text |
20 | 29 | ||
21 | .globl OPENSSL_atomic_add | 30 | .globl OPENSSL_atomic_add |
@@ -46,7 +55,7 @@ OPENSSL_rdtsc: | |||
46 | .type OPENSSL_ia32_cpuid,\@abi-omnipotent | 55 | .type OPENSSL_ia32_cpuid,\@abi-omnipotent |
47 | .align 16 | 56 | .align 16 |
48 | OPENSSL_ia32_cpuid: | 57 | OPENSSL_ia32_cpuid: |
49 | mov %rbx,%r8 | 58 | mov %rbx,%r8 # save %rbx |
50 | 59 | ||
51 | xor %eax,%eax | 60 | xor %eax,%eax |
52 | cpuid | 61 | cpuid |
@@ -78,7 +87,15 @@ OPENSSL_ia32_cpuid: | |||
78 | # AMD specific | 87 | # AMD specific |
79 | mov \$0x80000000,%eax | 88 | mov \$0x80000000,%eax |
80 | cpuid | 89 | cpuid |
81 | cmp \$0x80000008,%eax | 90 | cmp \$0x80000001,%eax |
91 | jb .Lintel | ||
92 | mov %eax,%r10d | ||
93 | mov \$0x80000001,%eax | ||
94 | cpuid | ||
95 | or %ecx,%r9d | ||
96 | and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 | ||
97 | |||
98 | cmp \$0x80000008,%r10d | ||
82 | jb .Lintel | 99 | jb .Lintel |
83 | 100 | ||
84 | mov \$0x80000008,%eax | 101 | mov \$0x80000008,%eax |
@@ -89,12 +106,12 @@ OPENSSL_ia32_cpuid: | |||
89 | mov \$1,%eax | 106 | mov \$1,%eax |
90 | cpuid | 107 | cpuid |
91 | bt \$28,%edx # test hyper-threading bit | 108 | bt \$28,%edx # test hyper-threading bit |
92 | jnc .Ldone | 109 | jnc .Lgeneric |
93 | shr \$16,%ebx # number of logical processors | 110 | shr \$16,%ebx # number of logical processors |
94 | cmp %r10b,%bl | 111 | cmp %r10b,%bl |
95 | ja .Ldone | 112 | ja .Lgeneric |
96 | and \$0xefffffff,%edx # ~(1<<28) | 113 | and \$0xefffffff,%edx # ~(1<<28) |
97 | jmp .Ldone | 114 | jmp .Lgeneric |
98 | 115 | ||
99 | .Lintel: | 116 | .Lintel: |
100 | cmp \$4,%r11d | 117 | cmp \$4,%r11d |
@@ -111,30 +128,47 @@ OPENSSL_ia32_cpuid: | |||
111 | .Lnocacheinfo: | 128 | .Lnocacheinfo: |
112 | mov \$1,%eax | 129 | mov \$1,%eax |
113 | cpuid | 130 | cpuid |
131 | and \$0xbfefffff,%edx # force reserved bits to 0 | ||
114 | cmp \$0,%r9d | 132 | cmp \$0,%r9d |
115 | jne .Lnotintel | 133 | jne .Lnotintel |
116 | or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR | 134 | or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs |
117 | and \$15,%ah | 135 | and \$15,%ah |
118 | cmp \$15,%ah # examine Family ID | 136 | cmp \$15,%ah # examine Family ID |
119 | je .Lnotintel | 137 | jne .Lnotintel |
120 | or \$0x40000000,%edx # use reserved bit to skip unrolled loop | 138 | or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR |
121 | .Lnotintel: | 139 | .Lnotintel: |
122 | bt \$28,%edx # test hyper-threading bit | 140 | bt \$28,%edx # test hyper-threading bit |
123 | jnc .Ldone | 141 | jnc .Lgeneric |
124 | and \$0xefffffff,%edx # ~(1<<28) | 142 | and \$0xefffffff,%edx # ~(1<<28) |
125 | cmp \$0,%r10d | 143 | cmp \$0,%r10d |
126 | je .Ldone | 144 | je .Lgeneric |
127 | 145 | ||
128 | or \$0x10000000,%edx # 1<<28 | 146 | or \$0x10000000,%edx # 1<<28 |
129 | shr \$16,%ebx | 147 | shr \$16,%ebx |
130 | cmp \$1,%bl # see if cache is shared | 148 | cmp \$1,%bl # see if cache is shared |
131 | ja .Ldone | 149 | ja .Lgeneric |
132 | and \$0xefffffff,%edx # ~(1<<28) | 150 | and \$0xefffffff,%edx # ~(1<<28) |
151 | .Lgeneric: | ||
152 | and \$0x00000800,%r9d # isolate AMD XOP flag | ||
153 | and \$0xfffff7ff,%ecx | ||
154 | or %ecx,%r9d # merge AMD XOP flag | ||
155 | |||
156 | mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx | ||
157 | bt \$27,%r9d # check OSXSAVE bit | ||
158 | jnc .Lclear_avx | ||
159 | xor %ecx,%ecx # XCR0 | ||
160 | .byte 0x0f,0x01,0xd0 # xgetbv | ||
161 | and \$6,%eax # isolate XMM and YMM state support | ||
162 | cmp \$6,%eax | ||
163 | je .Ldone | ||
164 | .Lclear_avx: | ||
165 | mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) | ||
166 | and %eax,%r9d # clear AVX, FMA and AMD XOP bits | ||
133 | .Ldone: | 167 | .Ldone: |
134 | shl \$32,%rcx | 168 | shl \$32,%r9 |
135 | mov %edx,%eax | 169 | mov %r10d,%eax |
136 | mov %r8,%rbx | 170 | mov %r8,%rbx # restore %rbx |
137 | or %rcx,%rax | 171 | or %r9,%rax |
138 | ret | 172 | ret |
139 | .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid | 173 | .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid |
140 | 174 | ||
@@ -229,4 +263,21 @@ OPENSSL_wipe_cpu: | |||
229 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | 263 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
230 | ___ | 264 | ___ |
231 | 265 | ||
266 | print<<___; | ||
267 | .globl OPENSSL_ia32_rdrand | ||
268 | .type OPENSSL_ia32_rdrand,\@abi-omnipotent | ||
269 | .align 16 | ||
270 | OPENSSL_ia32_rdrand: | ||
271 | mov \$8,%ecx | ||
272 | .Loop_rdrand: | ||
273 | rdrand %rax | ||
274 | jc .Lbreak_rdrand | ||
275 | loop .Loop_rdrand | ||
276 | .Lbreak_rdrand: | ||
277 | cmp \$0,%rax | ||
278 | cmove %rcx,%rax | ||
279 | ret | ||
280 | .size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand | ||
281 | ___ | ||
282 | |||
232 | close STDOUT; # flush | 283 | close STDOUT; # flush |