summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/x86_64cpuid.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/x86_64cpuid.pl')
-rw-r--r--src/lib/libcrypto/x86_64cpuid.pl256
1 files changed, 164 insertions, 92 deletions
diff --git a/src/lib/libcrypto/x86_64cpuid.pl b/src/lib/libcrypto/x86_64cpuid.pl
index 8946b464a8..b771a8539d 100644
--- a/src/lib/libcrypto/x86_64cpuid.pl
+++ b/src/lib/libcrypto/x86_64cpuid.pl
@@ -1,110 +1,38 @@
1#!/usr/bin/env perl 1#!/usr/bin/env perl
2 2
3$output=shift; 3$flavour = shift;
4$masm=1 if ($output =~ /\.asm/); 4$output = shift;
5open STDOUT,">$output" || die "can't open $output: $!"; 5if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7print<<___ if(defined($masm));
8_TEXT SEGMENT
9PUBLIC OPENSSL_rdtsc
10
11PUBLIC OPENSSL_atomic_add
12ALIGN 16
13OPENSSL_atomic_add PROC
14 mov eax,DWORD PTR[rcx]
15\$Lspin: lea r8,DWORD PTR[rdx+rax]
16lock cmpxchg DWORD PTR[rcx],r8d
17 jne \$Lspin
18 mov eax,r8d
19 cdqe
20 ret
21OPENSSL_atomic_add ENDP
22
23PUBLIC OPENSSL_wipe_cpu
24ALIGN 16
25OPENSSL_wipe_cpu PROC
26 pxor xmm0,xmm0
27 pxor xmm1,xmm1
28 pxor xmm2,xmm2
29 pxor xmm3,xmm3
30 pxor xmm4,xmm4
31 pxor xmm5,xmm5
32 xor rcx,rcx
33 xor rdx,rdx
34 xor r8,r8
35 xor r9,r9
36 xor r10,r10
37 xor r11,r11
38 lea rax,QWORD PTR[rsp+8]
39 ret
40OPENSSL_wipe_cpu ENDP
41_TEXT ENDS
42 6
43CRT\$XIU SEGMENT 7$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
44EXTRN OPENSSL_cpuid_setup:PROC
45DQ OPENSSL_cpuid_setup
46CRT\$XIU ENDS
47 8
48___ 9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49print<<___ if(!defined($masm)); 10open STDOUT,"| $^X ${dir}perlasm/x86_64-xlate.pl $flavour $output";
11
12if ($win64) { $arg1="%rcx"; $arg2="%rdx"; }
13else { $arg1="%rdi"; $arg2="%rsi"; }
14print<<___;
50#include <machine/asm.h> 15#include <machine/asm.h>
16.extern OPENSSL_cpuid_setup
17.section .init
18 call PIC_PLT(OPENSSL_cpuid_setup)
51 19
52.text 20.text
53 21
54.globl OPENSSL_atomic_add 22.globl OPENSSL_atomic_add
55.type OPENSSL_atomic_add,\@function 23.type OPENSSL_atomic_add,\@abi-omnipotent
56.align 16 24.align 16
57OPENSSL_atomic_add: 25OPENSSL_atomic_add:
58 movl (%rdi),%eax 26 movl ($arg1),%eax
59.Lspin: leaq (%rsi,%rax),%r8 27.Lspin: leaq ($arg2,%rax),%r8
60lock; cmpxchgl %r8d,(%rdi) 28 .byte 0xf0 # lock
29 cmpxchgl %r8d,($arg1)
61 jne .Lspin 30 jne .Lspin
62 movl %r8d,%eax 31 movl %r8d,%eax
63 .byte 0x48,0x98 32 .byte 0x48,0x98 # cltq/cdqe
64 ret 33 ret
65.size OPENSSL_atomic_add,.-OPENSSL_atomic_add 34.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
66 35
67.globl OPENSSL_wipe_cpu
68.type OPENSSL_wipe_cpu,\@function
69.align 16
70OPENSSL_wipe_cpu:
71 pxor %xmm0,%xmm0
72 pxor %xmm1,%xmm1
73 pxor %xmm2,%xmm2
74 pxor %xmm3,%xmm3
75 pxor %xmm4,%xmm4
76 pxor %xmm5,%xmm5
77 pxor %xmm6,%xmm6
78 pxor %xmm7,%xmm7
79 pxor %xmm8,%xmm8
80 pxor %xmm9,%xmm9
81 pxor %xmm10,%xmm10
82 pxor %xmm11,%xmm11
83 pxor %xmm12,%xmm12
84 pxor %xmm13,%xmm13
85 pxor %xmm14,%xmm14
86 pxor %xmm15,%xmm15
87 xorq %rcx,%rcx
88 xorq %rdx,%rdx
89 xorq %rsi,%rsi
90 xorq %rdi,%rdi
91 xorq %r8,%r8
92 xorq %r9,%r9
93 xorq %r10,%r10
94 xorq %r11,%r11
95 leaq 8(%rsp),%rax
96 ret
97.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
98
99.section .init
100 call PIC_PLT(OPENSSL_cpuid_setup)
101
102___
103
104open STDOUT,"| $^X perlasm/x86_64-xlate.pl $output";
105print<<___;
106.text
107
108.globl OPENSSL_rdtsc 36.globl OPENSSL_rdtsc
109.type OPENSSL_rdtsc,\@abi-omnipotent 37.type OPENSSL_rdtsc,\@abi-omnipotent
110.align 16 38.align 16
@@ -123,6 +51,8 @@ OPENSSL_ia32_cpuid:
123 51
124 xor %eax,%eax 52 xor %eax,%eax
125 cpuid 53 cpuid
54 mov %eax,%r11d # max value for standard query level
55
126 xor %eax,%eax 56 xor %eax,%eax
127 cmp \$0x756e6547,%ebx # "Genu" 57 cmp \$0x756e6547,%ebx # "Genu"
128 setne %al 58 setne %al
@@ -132,8 +62,54 @@ OPENSSL_ia32_cpuid:
132 or %eax,%r9d 62 or %eax,%r9d
133 cmp \$0x6c65746e,%ecx # "ntel" 63 cmp \$0x6c65746e,%ecx # "ntel"
134 setne %al 64 setne %al
135 or %eax,%r9d 65 or %eax,%r9d # 0 indicates Intel CPU
66 jz .Lintel
67
68 cmp \$0x68747541,%ebx # "Auth"
69 setne %al
70 mov %eax,%r10d
71 cmp \$0x69746E65,%edx # "enti"
72 setne %al
73 or %eax,%r10d
74 cmp \$0x444D4163,%ecx # "cAMD"
75 setne %al
76 or %eax,%r10d # 0 indicates AMD CPU
77 jnz .Lintel
78
79 # AMD specific
80 mov \$0x80000000,%eax
81 cpuid
82 cmp \$0x80000008,%eax
83 jb .Lintel
84
85 mov \$0x80000008,%eax
86 cpuid
87 movzb %cl,%r10 # number of cores - 1
88 inc %r10 # number of cores
89
90 mov \$1,%eax
91 cpuid
92 bt \$28,%edx # test hyper-threading bit
93 jnc .Ldone
94 shr \$16,%ebx # number of logical processors
95 cmp %r10b,%bl
96 ja .Ldone
97 and \$0xefffffff,%edx # ~(1<<28)
98 jmp .Ldone
99
100.Lintel:
101 cmp \$4,%r11d
102 mov \$-1,%r10d
103 jb .Lnocacheinfo
104
105 mov \$4,%eax
106 mov \$0,%ecx # query L1D
107 cpuid
108 mov %eax,%r10d
109 shr \$14,%r10d
110 and \$0xfff,%r10d # number of cores -1 per L1D
136 111
112.Lnocacheinfo:
137 mov \$1,%eax 113 mov \$1,%eax
138 cpuid 114 cpuid
139 cmp \$0,%r9d 115 cmp \$0,%r9d
@@ -146,6 +122,11 @@ OPENSSL_ia32_cpuid:
146.Lnotintel: 122.Lnotintel:
147 bt \$28,%edx # test hyper-threading bit 123 bt \$28,%edx # test hyper-threading bit
148 jnc .Ldone 124 jnc .Ldone
125 and \$0xefffffff,%edx # ~(1<<28)
126 cmp \$0,%r10d
127 je .Ldone
128
129 or \$0x10000000,%edx # 1<<28
149 shr \$16,%ebx 130 shr \$16,%ebx
150 cmp \$1,%bl # see if cache is shared 131 cmp \$1,%bl # see if cache is shared
151 ja .Ldone 132 ja .Ldone
@@ -157,5 +138,96 @@ OPENSSL_ia32_cpuid:
157 or %rcx,%rax 138 or %rcx,%rax
158 ret 139 ret
159.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid 140.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
141
142.globl OPENSSL_cleanse
143.type OPENSSL_cleanse,\@abi-omnipotent
144.align 16
145OPENSSL_cleanse:
146 xor %rax,%rax
147 cmp \$15,$arg2
148 jae .Lot
149 cmp \$0,$arg2
150 je .Lret
151.Little:
152 mov %al,($arg1)
153 sub \$1,$arg2
154 lea 1($arg1),$arg1
155 jnz .Little
156.Lret:
157 ret
158.align 16
159.Lot:
160 test \$7,$arg1
161 jz .Laligned
162 mov %al,($arg1)
163 lea -1($arg2),$arg2
164 lea 1($arg1),$arg1
165 jmp .Lot
166.Laligned:
167 mov %rax,($arg1)
168 lea -8($arg2),$arg2
169 test \$-8,$arg2
170 lea 8($arg1),$arg1
171 jnz .Laligned
172 cmp \$0,$arg2
173 jne .Little
174 ret
175.size OPENSSL_cleanse,.-OPENSSL_cleanse
176___
177
178print<<___ if (!$win64);
179.globl OPENSSL_wipe_cpu
180.type OPENSSL_wipe_cpu,\@abi-omnipotent
181.align 16
182OPENSSL_wipe_cpu:
183 pxor %xmm0,%xmm0
184 pxor %xmm1,%xmm1
185 pxor %xmm2,%xmm2
186 pxor %xmm3,%xmm3
187 pxor %xmm4,%xmm4
188 pxor %xmm5,%xmm5
189 pxor %xmm6,%xmm6
190 pxor %xmm7,%xmm7
191 pxor %xmm8,%xmm8
192 pxor %xmm9,%xmm9
193 pxor %xmm10,%xmm10
194 pxor %xmm11,%xmm11
195 pxor %xmm12,%xmm12
196 pxor %xmm13,%xmm13
197 pxor %xmm14,%xmm14
198 pxor %xmm15,%xmm15
199 xorq %rcx,%rcx
200 xorq %rdx,%rdx
201 xorq %rsi,%rsi
202 xorq %rdi,%rdi
203 xorq %r8,%r8
204 xorq %r9,%r9
205 xorq %r10,%r10
206 xorq %r11,%r11
207 leaq 8(%rsp),%rax
208 ret
209.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
160___ 210___
211print<<___ if ($win64);
212.globl OPENSSL_wipe_cpu
213.type OPENSSL_wipe_cpu,\@abi-omnipotent
214.align 16
215OPENSSL_wipe_cpu:
216 pxor %xmm0,%xmm0
217 pxor %xmm1,%xmm1
218 pxor %xmm2,%xmm2
219 pxor %xmm3,%xmm3
220 pxor %xmm4,%xmm4
221 pxor %xmm5,%xmm5
222 xorq %rcx,%rcx
223 xorq %rdx,%rdx
224 xorq %r8,%r8
225 xorq %r9,%r9
226 xorq %r10,%r10
227 xorq %r11,%r11
228 leaq 8(%rsp),%rax
229 ret
230.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
231___
232
161close STDOUT; # flush 233close STDOUT; # flush