diff options
Diffstat (limited to 'src/lib/libcrypto/sparccpuid.S')
-rw-r--r-- | src/lib/libcrypto/sparccpuid.S | 302 |
1 files changed, 0 insertions, 302 deletions
diff --git a/src/lib/libcrypto/sparccpuid.S b/src/lib/libcrypto/sparccpuid.S deleted file mode 100644 index b913e3dddb..0000000000 --- a/src/lib/libcrypto/sparccpuid.S +++ /dev/null | |||
@@ -1,302 +0,0 @@ | |||
1 | #if defined(__SUNPRO_C) && defined(__sparcv9) | ||
2 | # define ABI64 /* They've said -xarch=v9 at command line */ | ||
3 | #elif defined(__GNUC__) && defined(__arch64__) | ||
4 | # define ABI64 /* They've said -m64 at command line */ | ||
5 | #endif | ||
6 | |||
7 | #ifdef ABI64 | ||
8 | .register %g2,#scratch | ||
9 | .register %g3,#scratch | ||
10 | # define FRAME -192 | ||
11 | # define BIAS 2047 | ||
12 | #else | ||
13 | # define FRAME -96 | ||
14 | # define BIAS 0 | ||
15 | #endif | ||
16 | |||
17 | .text | ||
18 | .align 32 | ||
19 | .global OPENSSL_wipe_cpu | ||
20 | .type OPENSSL_wipe_cpu,#function | ||
21 | ! Keep in mind that this does not excuse us from wiping the stack! | ||
22 | ! This routine wipes registers, but not the backing store [which | ||
23 | ! resides on the stack, toward lower addresses]. To facilitate for | ||
24 | ! stack wiping I return pointer to the top of stack of the *caller*. | ||
25 | OPENSSL_wipe_cpu: | ||
26 | save %sp,FRAME,%sp | ||
27 | nop | ||
28 | #ifdef __sun | ||
29 | #include <sys/trap.h> | ||
30 | ta ST_CLEAN_WINDOWS | ||
31 | #else | ||
32 | call .walk.reg.wins | ||
33 | #endif | ||
34 | nop | ||
35 | call .PIC.zero.up | ||
36 | mov .zero-(.-4),%o0 | ||
37 | ld [%o0],%f0 | ||
38 | ld [%o0],%f1 | ||
39 | |||
40 | subcc %g0,1,%o0 | ||
41 | ! Following is V9 "rd %ccr,%o0" instruction. However! V8 | ||
42 | ! specification says that it ("rd %asr2,%o0" in V8 terms) does | ||
43 | ! not cause illegal_instruction trap. It therefore can be used | ||
44 | ! to determine if the CPU the code is executing on is V8- or | ||
45 | ! V9-compliant, as V9 returns a distinct value of 0x99, | ||
46 | ! "negative" and "borrow" bits set in both %icc and %xcc. | ||
47 | .word 0x91408000 !rd %ccr,%o0 | ||
48 | cmp %o0,0x99 | ||
49 | bne .v8 | ||
50 | nop | ||
51 | ! Even though we do not use %fp register bank, | ||
52 | ! we wipe it as memcpy might have used it... | ||
53 | .word 0xbfa00040 !fmovd %f0,%f62 | ||
54 | .word 0xbba00040 !... | ||
55 | .word 0xb7a00040 | ||
56 | .word 0xb3a00040 | ||
57 | .word 0xafa00040 | ||
58 | .word 0xaba00040 | ||
59 | .word 0xa7a00040 | ||
60 | .word 0xa3a00040 | ||
61 | .word 0x9fa00040 | ||
62 | .word 0x9ba00040 | ||
63 | .word 0x97a00040 | ||
64 | .word 0x93a00040 | ||
65 | .word 0x8fa00040 | ||
66 | .word 0x8ba00040 | ||
67 | .word 0x87a00040 | ||
68 | .word 0x83a00040 !fmovd %f0,%f32 | ||
69 | .v8: fmovs %f1,%f31 | ||
70 | clr %o0 | ||
71 | fmovs %f0,%f30 | ||
72 | clr %o1 | ||
73 | fmovs %f1,%f29 | ||
74 | clr %o2 | ||
75 | fmovs %f0,%f28 | ||
76 | clr %o3 | ||
77 | fmovs %f1,%f27 | ||
78 | clr %o4 | ||
79 | fmovs %f0,%f26 | ||
80 | clr %o5 | ||
81 | fmovs %f1,%f25 | ||
82 | clr %o7 | ||
83 | fmovs %f0,%f24 | ||
84 | clr %l0 | ||
85 | fmovs %f1,%f23 | ||
86 | clr %l1 | ||
87 | fmovs %f0,%f22 | ||
88 | clr %l2 | ||
89 | fmovs %f1,%f21 | ||
90 | clr %l3 | ||
91 | fmovs %f0,%f20 | ||
92 | clr %l4 | ||
93 | fmovs %f1,%f19 | ||
94 | clr %l5 | ||
95 | fmovs %f0,%f18 | ||
96 | clr %l6 | ||
97 | fmovs %f1,%f17 | ||
98 | clr %l7 | ||
99 | fmovs %f0,%f16 | ||
100 | clr %i0 | ||
101 | fmovs %f1,%f15 | ||
102 | clr %i1 | ||
103 | fmovs %f0,%f14 | ||
104 | clr %i2 | ||
105 | fmovs %f1,%f13 | ||
106 | clr %i3 | ||
107 | fmovs %f0,%f12 | ||
108 | clr %i4 | ||
109 | fmovs %f1,%f11 | ||
110 | clr %i5 | ||
111 | fmovs %f0,%f10 | ||
112 | clr %g1 | ||
113 | fmovs %f1,%f9 | ||
114 | clr %g2 | ||
115 | fmovs %f0,%f8 | ||
116 | clr %g3 | ||
117 | fmovs %f1,%f7 | ||
118 | clr %g4 | ||
119 | fmovs %f0,%f6 | ||
120 | clr %g5 | ||
121 | fmovs %f1,%f5 | ||
122 | fmovs %f0,%f4 | ||
123 | fmovs %f1,%f3 | ||
124 | fmovs %f0,%f2 | ||
125 | |||
126 | add %fp,BIAS,%i0 ! return pointer to callerīs top of stack | ||
127 | |||
128 | ret | ||
129 | restore | ||
130 | |||
131 | .zero: .long 0x0,0x0 | ||
132 | .PIC.zero.up: | ||
133 | retl | ||
134 | add %o0,%o7,%o0 | ||
135 | #ifdef DEBUG | ||
136 | .global walk_reg_wins | ||
137 | .type walk_reg_wins,#function | ||
138 | walk_reg_wins: | ||
139 | #endif | ||
140 | .walk.reg.wins: | ||
141 | save %sp,FRAME,%sp | ||
142 | cmp %i7,%o7 | ||
143 | be 2f | ||
144 | clr %o0 | ||
145 | cmp %o7,0 ! compiler never cleans %o7... | ||
146 | be 1f ! could have been a leaf function... | ||
147 | clr %o1 | ||
148 | call .walk.reg.wins | ||
149 | nop | ||
150 | 1: clr %o2 | ||
151 | clr %o3 | ||
152 | clr %o4 | ||
153 | clr %o5 | ||
154 | clr %o7 | ||
155 | clr %l0 | ||
156 | clr %l1 | ||
157 | clr %l2 | ||
158 | clr %l3 | ||
159 | clr %l4 | ||
160 | clr %l5 | ||
161 | clr %l6 | ||
162 | clr %l7 | ||
163 | add %o0,1,%i0 ! used for debugging | ||
164 | 2: ret | ||
165 | restore | ||
166 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | ||
167 | |||
168 | .global OPENSSL_atomic_add | ||
169 | .type OPENSSL_atomic_add,#function | ||
170 | .align 32 | ||
171 | OPENSSL_atomic_add: | ||
172 | #ifndef ABI64 | ||
173 | subcc %g0,1,%o2 | ||
174 | .word 0x95408000 !rd %ccr,%o2, see comment above | ||
175 | cmp %o2,0x99 | ||
176 | be .v9 | ||
177 | nop | ||
178 | save %sp,FRAME,%sp | ||
179 | ba .enter | ||
180 | nop | ||
181 | #ifdef __sun | ||
182 | ! Note that you do not have to link with libthread to call thr_yield, | ||
183 | ! as libc provides a stub, which is overloaded the moment you link | ||
184 | ! with *either* libpthread or libthread... | ||
185 | #define YIELD_CPU thr_yield | ||
186 | #else | ||
187 | ! applies at least to Linux and FreeBSD... Feedback expected... | ||
188 | #define YIELD_CPU sched_yield | ||
189 | #endif | ||
190 | .spin: call YIELD_CPU | ||
191 | nop | ||
192 | .enter: ld [%i0],%i2 | ||
193 | cmp %i2,-4096 | ||
194 | be .spin | ||
195 | mov -1,%i2 | ||
196 | swap [%i0],%i2 | ||
197 | cmp %i2,-1 | ||
198 | be .spin | ||
199 | add %i2,%i1,%i2 | ||
200 | stbar | ||
201 | st %i2,[%i0] | ||
202 | sra %i2,%g0,%i0 | ||
203 | ret | ||
204 | restore | ||
205 | .v9: | ||
206 | #endif | ||
207 | ld [%o0],%o2 | ||
208 | 1: add %o1,%o2,%o3 | ||
209 | .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3 | ||
210 | cmp %o2,%o3 | ||
211 | bne 1b | ||
212 | mov %o3,%o2 ! cas is always fetching to dest. register | ||
213 | add %o1,%o2,%o0 ! OpenSSL expects the new value | ||
214 | retl | ||
215 | sra %o0,%g0,%o0 ! we return signed int, remember? | ||
216 | .size OPENSSL_atomic_add,.-OPENSSL_atomic_add | ||
217 | |||
218 | .global _sparcv9_vis1_probe | ||
219 | .align 8 | ||
220 | _sparcv9_vis1_probe: | ||
221 | add %sp,BIAS+2,%o1 | ||
222 | .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 | ||
223 | retl | ||
224 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | ||
225 | .type _sparcv9_vis1_probe,#function | ||
226 | .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe | ||
227 | |||
228 | ! Probe and instrument VIS1 instruction. Output is number of cycles it | ||
229 | ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit | ||
230 | ! is slow (documented to be 6 cycles on T2) and the core is in-order | ||
231 | ! single-issue, it should be possible to distinguish Tx reliably... | ||
232 | ! Observed return values are: | ||
233 | ! | ||
234 | ! UltraSPARC IIe 7 | ||
235 | ! UltraSPARC III 7 | ||
236 | ! UltraSPARC T1 24 | ||
237 | ! | ||
238 | ! Numbers for T2 and SPARC64 V-VII are more than welcomed. | ||
239 | ! | ||
240 | ! It would be possible to detect specifically US-T1 by instrumenting | ||
241 | ! fmul8ulx16, which is emulated on T1 and as such accounts for quite | ||
242 | ! a lot of %tick-s, couple of thousand on Linux... | ||
243 | .global _sparcv9_vis1_instrument | ||
244 | .align 8 | ||
245 | _sparcv9_vis1_instrument: | ||
246 | .word 0x91410000 !rd %tick,%o0 | ||
247 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | ||
248 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | ||
249 | .word 0x93410000 !rd %tick,%o1 | ||
250 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | ||
251 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | ||
252 | .word 0x95410000 !rd %tick,%o2 | ||
253 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | ||
254 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | ||
255 | .word 0x97410000 !rd %tick,%o3 | ||
256 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | ||
257 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | ||
258 | .word 0x99410000 !rd %tick,%o4 | ||
259 | |||
260 | ! calculate intervals | ||
261 | sub %o1,%o0,%o0 | ||
262 | sub %o2,%o1,%o1 | ||
263 | sub %o3,%o2,%o2 | ||
264 | sub %o4,%o3,%o3 | ||
265 | |||
266 | ! find minumum value | ||
267 | cmp %o0,%o1 | ||
268 | .word 0x38680002 !bgu,a %xcc,.+8 | ||
269 | mov %o1,%o0 | ||
270 | cmp %o0,%o2 | ||
271 | .word 0x38680002 !bgu,a %xcc,.+8 | ||
272 | mov %o2,%o0 | ||
273 | cmp %o0,%o3 | ||
274 | .word 0x38680002 !bgu,a %xcc,.+8 | ||
275 | mov %o3,%o0 | ||
276 | |||
277 | retl | ||
278 | nop | ||
279 | .type _sparcv9_vis1_instrument,#function | ||
280 | .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument | ||
281 | |||
282 | .global _sparcv9_vis2_probe | ||
283 | .align 8 | ||
284 | _sparcv9_vis2_probe: | ||
285 | retl | ||
286 | .word 0x81b00980 !bshuffle %f0,%f0,%f0 | ||
287 | .type _sparcv9_vis2_probe,#function | ||
288 | .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe | ||
289 | |||
290 | .global _sparcv9_fmadd_probe | ||
291 | .align 8 | ||
292 | _sparcv9_fmadd_probe: | ||
293 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | ||
294 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | ||
295 | retl | ||
296 | .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0 | ||
297 | .type _sparcv9_fmadd_probe,#function | ||
298 | .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe | ||
299 | |||
300 | .section ".init",#alloc,#execinstr | ||
301 | call OPENSSL_cpuid_setup | ||
302 | nop | ||