summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/x86_64cpuid.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/x86_64cpuid.pl')
-rw-r--r--src/lib/libcrypto/x86_64cpuid.pl87
1 files changed, 69 insertions, 18 deletions
diff --git a/src/lib/libcrypto/x86_64cpuid.pl b/src/lib/libcrypto/x86_64cpuid.pl
index b771a8539d..58e9bedcf0 100644
--- a/src/lib/libcrypto/x86_64cpuid.pl
+++ b/src/lib/libcrypto/x86_64cpuid.pl
@@ -7,16 +7,25 @@ if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8 8
9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output"; 10( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12die "can't locate x86_64-xlate.pl";
13
14open STDOUT,"| $^X $xlate $flavour $output";
15
16($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
17 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
11 18
12if ($win64) { $arg1="%rcx"; $arg2="%rdx"; }
13else { $arg1="%rdi"; $arg2="%rsi"; }
14print<<___; 19print<<___;
15#include <machine/asm.h> 20#include <machine/asm.h>
16.extern OPENSSL_cpuid_setup 21.extern OPENSSL_cpuid_setup
22.hidden OPENSSL_cpuid_setup
17.section .init 23.section .init
18 call PIC_PLT(OPENSSL_cpuid_setup) 24 call PIC_PLT(OPENSSL_cpuid_setup)
19 25
26.hidden OPENSSL_ia32cap_P
27.comm OPENSSL_ia32cap_P,8,4
28
20.text 29.text
21 30
22.globl OPENSSL_atomic_add 31.globl OPENSSL_atomic_add
@@ -47,7 +56,7 @@ OPENSSL_rdtsc:
47.type OPENSSL_ia32_cpuid,\@abi-omnipotent 56.type OPENSSL_ia32_cpuid,\@abi-omnipotent
48.align 16 57.align 16
49OPENSSL_ia32_cpuid: 58OPENSSL_ia32_cpuid:
50 mov %rbx,%r8 59 mov %rbx,%r8 # save %rbx
51 60
52 xor %eax,%eax 61 xor %eax,%eax
53 cpuid 62 cpuid
@@ -79,7 +88,15 @@ OPENSSL_ia32_cpuid:
79 # AMD specific 88 # AMD specific
80 mov \$0x80000000,%eax 89 mov \$0x80000000,%eax
81 cpuid 90 cpuid
82 cmp \$0x80000008,%eax 91 cmp \$0x80000001,%eax
92 jb .Lintel
93 mov %eax,%r10d
94 mov \$0x80000001,%eax
95 cpuid
96 or %ecx,%r9d
97 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
98
99 cmp \$0x80000008,%r10d
83 jb .Lintel 100 jb .Lintel
84 101
85 mov \$0x80000008,%eax 102 mov \$0x80000008,%eax
@@ -90,12 +107,12 @@ OPENSSL_ia32_cpuid:
90 mov \$1,%eax 107 mov \$1,%eax
91 cpuid 108 cpuid
92 bt \$28,%edx # test hyper-threading bit 109 bt \$28,%edx # test hyper-threading bit
93 jnc .Ldone 110 jnc .Lgeneric
94 shr \$16,%ebx # number of logical processors 111 shr \$16,%ebx # number of logical processors
95 cmp %r10b,%bl 112 cmp %r10b,%bl
96 ja .Ldone 113 ja .Lgeneric
97 and \$0xefffffff,%edx # ~(1<<28) 114 and \$0xefffffff,%edx # ~(1<<28)
98 jmp .Ldone 115 jmp .Lgeneric
99 116
100.Lintel: 117.Lintel:
101 cmp \$4,%r11d 118 cmp \$4,%r11d
@@ -112,30 +129,47 @@ OPENSSL_ia32_cpuid:
112.Lnocacheinfo: 129.Lnocacheinfo:
113 mov \$1,%eax 130 mov \$1,%eax
114 cpuid 131 cpuid
132 and \$0xbfefffff,%edx # force reserved bits to 0
115 cmp \$0,%r9d 133 cmp \$0,%r9d
116 jne .Lnotintel 134 jne .Lnotintel
117 or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR 135 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
118 and \$15,%ah 136 and \$15,%ah
119 cmp \$15,%ah # examine Family ID 137 cmp \$15,%ah # examine Family ID
120 je .Lnotintel 138 jne .Lnotintel
121 or \$0x40000000,%edx # use reserved bit to skip unrolled loop 139 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
122.Lnotintel: 140.Lnotintel:
123 bt \$28,%edx # test hyper-threading bit 141 bt \$28,%edx # test hyper-threading bit
124 jnc .Ldone 142 jnc .Lgeneric
125 and \$0xefffffff,%edx # ~(1<<28) 143 and \$0xefffffff,%edx # ~(1<<28)
126 cmp \$0,%r10d 144 cmp \$0,%r10d
127 je .Ldone 145 je .Lgeneric
128 146
129 or \$0x10000000,%edx # 1<<28 147 or \$0x10000000,%edx # 1<<28
130 shr \$16,%ebx 148 shr \$16,%ebx
131 cmp \$1,%bl # see if cache is shared 149 cmp \$1,%bl # see if cache is shared
132 ja .Ldone 150 ja .Lgeneric
133 and \$0xefffffff,%edx # ~(1<<28) 151 and \$0xefffffff,%edx # ~(1<<28)
152.Lgeneric:
153 and \$0x00000800,%r9d # isolate AMD XOP flag
154 and \$0xfffff7ff,%ecx
155 or %ecx,%r9d # merge AMD XOP flag
156
157 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
158 bt \$27,%r9d # check OSXSAVE bit
159 jnc .Lclear_avx
160 xor %ecx,%ecx # XCR0
161 .byte 0x0f,0x01,0xd0 # xgetbv
162 and \$6,%eax # isolate XMM and YMM state support
163 cmp \$6,%eax
164 je .Ldone
165.Lclear_avx:
166 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
167 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
134.Ldone: 168.Ldone:
135 shl \$32,%rcx 169 shl \$32,%r9
136 mov %edx,%eax 170 mov %r10d,%eax
137 mov %r8,%rbx 171 mov %r8,%rbx # restore %rbx
138 or %rcx,%rax 172 or %r9,%rax
139 ret 173 ret
140.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid 174.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
141 175
@@ -230,4 +264,21 @@ OPENSSL_wipe_cpu:
230.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu 264.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
231___ 265___
232 266
267print<<___;
268.globl OPENSSL_ia32_rdrand
269.type OPENSSL_ia32_rdrand,\@abi-omnipotent
270.align 16
271OPENSSL_ia32_rdrand:
272 mov \$8,%ecx
273.Loop_rdrand:
274 rdrand %rax
275 jc .Lbreak_rdrand
276 loop .Loop_rdrand
277.Lbreak_rdrand:
278 cmp \$0,%rax
279 cmove %rcx,%rax
280 ret
281.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
282___
283
233close STDOUT; # flush 284close STDOUT; # flush