diff options
Diffstat (limited to 'src/lib/libcrypto/x86_64cpuid.pl')
-rw-r--r-- | src/lib/libcrypto/x86_64cpuid.pl | 87 |
1 files changed, 69 insertions, 18 deletions
diff --git a/src/lib/libcrypto/x86_64cpuid.pl b/src/lib/libcrypto/x86_64cpuid.pl index b771a8539d..58e9bedcf0 100644 --- a/src/lib/libcrypto/x86_64cpuid.pl +++ b/src/lib/libcrypto/x86_64cpuid.pl | |||
@@ -7,16 +7,25 @@ if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } | |||
7 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); | 7 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); |
8 | 8 | ||
9 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; | 9 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
10 | open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output"; | 10 | ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or |
11 | ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or | ||
12 | die "can't locate x86_64-xlate.pl"; | ||
13 | |||
14 | open STDOUT,"| $^X $xlate $flavour $output"; | ||
15 | |||
16 | ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order | ||
17 | ("%rdi","%rsi","%rdx","%rcx"); # Unix order | ||
11 | 18 | ||
12 | if ($win64) { $arg1="%rcx"; $arg2="%rdx"; } | ||
13 | else { $arg1="%rdi"; $arg2="%rsi"; } | ||
14 | print<<___; | 19 | print<<___; |
15 | #include <machine/asm.h> | 20 | #include <machine/asm.h> |
16 | .extern OPENSSL_cpuid_setup | 21 | .extern OPENSSL_cpuid_setup |
22 | .hidden OPENSSL_cpuid_setup | ||
17 | .section .init | 23 | .section .init |
18 | call PIC_PLT(OPENSSL_cpuid_setup) | 24 | call PIC_PLT(OPENSSL_cpuid_setup) |
19 | 25 | ||
26 | .hidden OPENSSL_ia32cap_P | ||
27 | .comm OPENSSL_ia32cap_P,8,4 | ||
28 | |||
20 | .text | 29 | .text |
21 | 30 | ||
22 | .globl OPENSSL_atomic_add | 31 | .globl OPENSSL_atomic_add |
@@ -47,7 +56,7 @@ OPENSSL_rdtsc: | |||
47 | .type OPENSSL_ia32_cpuid,\@abi-omnipotent | 56 | .type OPENSSL_ia32_cpuid,\@abi-omnipotent |
48 | .align 16 | 57 | .align 16 |
49 | OPENSSL_ia32_cpuid: | 58 | OPENSSL_ia32_cpuid: |
50 | mov %rbx,%r8 | 59 | mov %rbx,%r8 # save %rbx |
51 | 60 | ||
52 | xor %eax,%eax | 61 | xor %eax,%eax |
53 | cpuid | 62 | cpuid |
@@ -79,7 +88,15 @@ OPENSSL_ia32_cpuid: | |||
79 | # AMD specific | 88 | # AMD specific |
80 | mov \$0x80000000,%eax | 89 | mov \$0x80000000,%eax |
81 | cpuid | 90 | cpuid |
82 | cmp \$0x80000008,%eax | 91 | cmp \$0x80000001,%eax |
92 | jb .Lintel | ||
93 | mov %eax,%r10d | ||
94 | mov \$0x80000001,%eax | ||
95 | cpuid | ||
96 | or %ecx,%r9d | ||
97 | and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 | ||
98 | |||
99 | cmp \$0x80000008,%r10d | ||
83 | jb .Lintel | 100 | jb .Lintel |
84 | 101 | ||
85 | mov \$0x80000008,%eax | 102 | mov \$0x80000008,%eax |
@@ -90,12 +107,12 @@ OPENSSL_ia32_cpuid: | |||
90 | mov \$1,%eax | 107 | mov \$1,%eax |
91 | cpuid | 108 | cpuid |
92 | bt \$28,%edx # test hyper-threading bit | 109 | bt \$28,%edx # test hyper-threading bit |
93 | jnc .Ldone | 110 | jnc .Lgeneric |
94 | shr \$16,%ebx # number of logical processors | 111 | shr \$16,%ebx # number of logical processors |
95 | cmp %r10b,%bl | 112 | cmp %r10b,%bl |
96 | ja .Ldone | 113 | ja .Lgeneric |
97 | and \$0xefffffff,%edx # ~(1<<28) | 114 | and \$0xefffffff,%edx # ~(1<<28) |
98 | jmp .Ldone | 115 | jmp .Lgeneric |
99 | 116 | ||
100 | .Lintel: | 117 | .Lintel: |
101 | cmp \$4,%r11d | 118 | cmp \$4,%r11d |
@@ -112,30 +129,47 @@ OPENSSL_ia32_cpuid: | |||
112 | .Lnocacheinfo: | 129 | .Lnocacheinfo: |
113 | mov \$1,%eax | 130 | mov \$1,%eax |
114 | cpuid | 131 | cpuid |
132 | and \$0xbfefffff,%edx # force reserved bits to 0 | ||
115 | cmp \$0,%r9d | 133 | cmp \$0,%r9d |
116 | jne .Lnotintel | 134 | jne .Lnotintel |
117 | or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR | 135 | or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs |
118 | and \$15,%ah | 136 | and \$15,%ah |
119 | cmp \$15,%ah # examine Family ID | 137 | cmp \$15,%ah # examine Family ID |
120 | je .Lnotintel | 138 | jne .Lnotintel |
121 | or \$0x40000000,%edx # use reserved bit to skip unrolled loop | 139 | or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR |
122 | .Lnotintel: | 140 | .Lnotintel: |
123 | bt \$28,%edx # test hyper-threading bit | 141 | bt \$28,%edx # test hyper-threading bit |
124 | jnc .Ldone | 142 | jnc .Lgeneric |
125 | and \$0xefffffff,%edx # ~(1<<28) | 143 | and \$0xefffffff,%edx # ~(1<<28) |
126 | cmp \$0,%r10d | 144 | cmp \$0,%r10d |
127 | je .Ldone | 145 | je .Lgeneric |
128 | 146 | ||
129 | or \$0x10000000,%edx # 1<<28 | 147 | or \$0x10000000,%edx # 1<<28 |
130 | shr \$16,%ebx | 148 | shr \$16,%ebx |
131 | cmp \$1,%bl # see if cache is shared | 149 | cmp \$1,%bl # see if cache is shared |
132 | ja .Ldone | 150 | ja .Lgeneric |
133 | and \$0xefffffff,%edx # ~(1<<28) | 151 | and \$0xefffffff,%edx # ~(1<<28) |
152 | .Lgeneric: | ||
153 | and \$0x00000800,%r9d # isolate AMD XOP flag | ||
154 | and \$0xfffff7ff,%ecx | ||
155 | or %ecx,%r9d # merge AMD XOP flag | ||
156 | |||
157 | mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx | ||
158 | bt \$27,%r9d # check OSXSAVE bit | ||
159 | jnc .Lclear_avx | ||
160 | xor %ecx,%ecx # XCR0 | ||
161 | .byte 0x0f,0x01,0xd0 # xgetbv | ||
162 | and \$6,%eax # isolate XMM and YMM state support | ||
163 | cmp \$6,%eax | ||
164 | je .Ldone | ||
165 | .Lclear_avx: | ||
166 | mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) | ||
167 | and %eax,%r9d # clear AVX, FMA and AMD XOP bits | ||
134 | .Ldone: | 168 | .Ldone: |
135 | shl \$32,%rcx | 169 | shl \$32,%r9 |
136 | mov %edx,%eax | 170 | mov %r10d,%eax |
137 | mov %r8,%rbx | 171 | mov %r8,%rbx # restore %rbx |
138 | or %rcx,%rax | 172 | or %r9,%rax |
139 | ret | 173 | ret |
140 | .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid | 174 | .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid |
141 | 175 | ||
@@ -230,4 +264,21 @@ OPENSSL_wipe_cpu: | |||
230 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | 264 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
231 | ___ | 265 | ___ |
232 | 266 | ||
267 | print<<___; | ||
268 | .globl OPENSSL_ia32_rdrand | ||
269 | .type OPENSSL_ia32_rdrand,\@abi-omnipotent | ||
270 | .align 16 | ||
271 | OPENSSL_ia32_rdrand: | ||
272 | mov \$8,%ecx | ||
273 | .Loop_rdrand: | ||
274 | rdrand %rax | ||
275 | jc .Lbreak_rdrand | ||
276 | loop .Loop_rdrand | ||
277 | .Lbreak_rdrand: | ||
278 | cmp \$0,%rax | ||
279 | cmove %rcx,%rax | ||
280 | ret | ||
281 | .size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand | ||
282 | ___ | ||
283 | |||
233 | close STDOUT; # flush | 284 | close STDOUT; # flush |