diff options
Diffstat (limited to 'src/lib/libcrypto/sparccpuid.S')
-rw-r--r-- | src/lib/libcrypto/sparccpuid.S | 402 |
1 files changed, 402 insertions, 0 deletions
diff --git a/src/lib/libcrypto/sparccpuid.S b/src/lib/libcrypto/sparccpuid.S new file mode 100644 index 0000000000..ae61f7f5ce --- /dev/null +++ b/src/lib/libcrypto/sparccpuid.S | |||
@@ -0,0 +1,402 @@ | |||
1 | #if defined(__SUNPRO_C) && defined(__sparcv9) | ||
2 | # define ABI64 /* They've said -xarch=v9 at command line */ | ||
3 | #elif defined(__GNUC__) && defined(__arch64__) | ||
4 | # define ABI64 /* They've said -m64 at command line */ | ||
5 | #endif | ||
6 | |||
7 | #ifdef ABI64 | ||
8 | .register %g2,#scratch | ||
9 | .register %g3,#scratch | ||
10 | # define FRAME -192 | ||
11 | # define BIAS 2047 | ||
12 | #else | ||
13 | # define FRAME -96 | ||
14 | # define BIAS 0 | ||
15 | #endif | ||
16 | |||
17 | .text | ||
18 | .align 32 | ||
19 | .global OPENSSL_wipe_cpu | ||
20 | .type OPENSSL_wipe_cpu,#function | ||
21 | ! Keep in mind that this does not excuse us from wiping the stack! | ||
22 | ! This routine wipes registers, but not the backing store [which | ||
23 | ! resides on the stack, toward lower addresses]. To facilitate for | ||
24 | ! stack wiping I return pointer to the top of stack of the *caller*. | ||
25 | OPENSSL_wipe_cpu: | ||
26 | save %sp,FRAME,%sp | ||
27 | nop | ||
28 | #ifdef __sun | ||
29 | #include <sys/trap.h> | ||
30 | ta ST_CLEAN_WINDOWS | ||
31 | #else | ||
32 | call .walk.reg.wins | ||
33 | #endif | ||
34 | nop | ||
35 | call .PIC.zero.up | ||
36 | mov .zero-(.-4),%o0 | ||
37 | ld [%o0],%f0 | ||
38 | ld [%o0],%f1 | ||
39 | |||
40 | subcc %g0,1,%o0 | ||
41 | ! Following is V9 "rd %ccr,%o0" instruction. However! V8 | ||
42 | ! specification says that it ("rd %asr2,%o0" in V8 terms) does | ||
43 | ! not cause illegal_instruction trap. It therefore can be used | ||
44 | ! to determine if the CPU the code is executing on is V8- or | ||
45 | ! V9-compliant, as V9 returns a distinct value of 0x99, | ||
46 | ! "negative" and "borrow" bits set in both %icc and %xcc. | ||
47 | .word 0x91408000 !rd %ccr,%o0 | ||
48 | cmp %o0,0x99 | ||
49 | bne .v8 | ||
50 | nop | ||
51 | ! Even though we do not use %fp register bank, | ||
52 | ! we wipe it as memcpy might have used it... | ||
53 | .word 0xbfa00040 !fmovd %f0,%f62 | ||
54 | .word 0xbba00040 !... | ||
55 | .word 0xb7a00040 | ||
56 | .word 0xb3a00040 | ||
57 | .word 0xafa00040 | ||
58 | .word 0xaba00040 | ||
59 | .word 0xa7a00040 | ||
60 | .word 0xa3a00040 | ||
61 | .word 0x9fa00040 | ||
62 | .word 0x9ba00040 | ||
63 | .word 0x97a00040 | ||
64 | .word 0x93a00040 | ||
65 | .word 0x8fa00040 | ||
66 | .word 0x8ba00040 | ||
67 | .word 0x87a00040 | ||
68 | .word 0x83a00040 !fmovd %f0,%f32 | ||
69 | .v8: fmovs %f1,%f31 | ||
70 | clr %o0 | ||
71 | fmovs %f0,%f30 | ||
72 | clr %o1 | ||
73 | fmovs %f1,%f29 | ||
74 | clr %o2 | ||
75 | fmovs %f0,%f28 | ||
76 | clr %o3 | ||
77 | fmovs %f1,%f27 | ||
78 | clr %o4 | ||
79 | fmovs %f0,%f26 | ||
80 | clr %o5 | ||
81 | fmovs %f1,%f25 | ||
82 | clr %o7 | ||
83 | fmovs %f0,%f24 | ||
84 | clr %l0 | ||
85 | fmovs %f1,%f23 | ||
86 | clr %l1 | ||
87 | fmovs %f0,%f22 | ||
88 | clr %l2 | ||
89 | fmovs %f1,%f21 | ||
90 | clr %l3 | ||
91 | fmovs %f0,%f20 | ||
92 | clr %l4 | ||
93 | fmovs %f1,%f19 | ||
94 | clr %l5 | ||
95 | fmovs %f0,%f18 | ||
96 | clr %l6 | ||
97 | fmovs %f1,%f17 | ||
98 | clr %l7 | ||
99 | fmovs %f0,%f16 | ||
100 | clr %i0 | ||
101 | fmovs %f1,%f15 | ||
102 | clr %i1 | ||
103 | fmovs %f0,%f14 | ||
104 | clr %i2 | ||
105 | fmovs %f1,%f13 | ||
106 | clr %i3 | ||
107 | fmovs %f0,%f12 | ||
108 | clr %i4 | ||
109 | fmovs %f1,%f11 | ||
110 | clr %i5 | ||
111 | fmovs %f0,%f10 | ||
112 | clr %g1 | ||
113 | fmovs %f1,%f9 | ||
114 | clr %g2 | ||
115 | fmovs %f0,%f8 | ||
116 | clr %g3 | ||
117 | fmovs %f1,%f7 | ||
118 | clr %g4 | ||
119 | fmovs %f0,%f6 | ||
120 | clr %g5 | ||
121 | fmovs %f1,%f5 | ||
122 | fmovs %f0,%f4 | ||
123 | fmovs %f1,%f3 | ||
124 | fmovs %f0,%f2 | ||
125 | |||
126 | add %fp,BIAS,%i0 ! return pointer to callerīs top of stack | ||
127 | |||
128 | ret | ||
129 | restore | ||
130 | |||
131 | .zero: .long 0x0,0x0 | ||
132 | .PIC.zero.up: | ||
133 | retl | ||
134 | add %o0,%o7,%o0 | ||
135 | #ifdef DEBUG | ||
136 | .global walk_reg_wins | ||
137 | .type walk_reg_wins,#function | ||
138 | walk_reg_wins: | ||
139 | #endif | ||
140 | .walk.reg.wins: | ||
141 | save %sp,FRAME,%sp | ||
142 | cmp %i7,%o7 | ||
143 | be 2f | ||
144 | clr %o0 | ||
145 | cmp %o7,0 ! compiler never cleans %o7... | ||
146 | be 1f ! could have been a leaf function... | ||
147 | clr %o1 | ||
148 | call .walk.reg.wins | ||
149 | nop | ||
150 | 1: clr %o2 | ||
151 | clr %o3 | ||
152 | clr %o4 | ||
153 | clr %o5 | ||
154 | clr %o7 | ||
155 | clr %l0 | ||
156 | clr %l1 | ||
157 | clr %l2 | ||
158 | clr %l3 | ||
159 | clr %l4 | ||
160 | clr %l5 | ||
161 | clr %l6 | ||
162 | clr %l7 | ||
163 | add %o0,1,%i0 ! used for debugging | ||
164 | 2: ret | ||
165 | restore | ||
166 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu | ||
167 | |||
168 | .global OPENSSL_atomic_add | ||
169 | .type OPENSSL_atomic_add,#function | ||
170 | .align 32 | ||
171 | OPENSSL_atomic_add: | ||
172 | #ifndef ABI64 | ||
173 | subcc %g0,1,%o2 | ||
174 | .word 0x95408000 !rd %ccr,%o2, see comment above | ||
175 | cmp %o2,0x99 | ||
176 | be .v9 | ||
177 | nop | ||
178 | save %sp,FRAME,%sp | ||
179 | ba .enter | ||
180 | nop | ||
181 | #ifdef __sun | ||
182 | ! Note that you do not have to link with libthread to call thr_yield, | ||
183 | ! as libc provides a stub, which is overloaded the moment you link | ||
184 | ! with *either* libpthread or libthread... | ||
185 | #define YIELD_CPU thr_yield | ||
186 | #else | ||
187 | ! applies at least to Linux and FreeBSD... Feedback expected... | ||
188 | #define YIELD_CPU sched_yield | ||
189 | #endif | ||
190 | .spin: call YIELD_CPU | ||
191 | nop | ||
192 | .enter: ld [%i0],%i2 | ||
193 | cmp %i2,-4096 | ||
194 | be .spin | ||
195 | mov -1,%i2 | ||
196 | swap [%i0],%i2 | ||
197 | cmp %i2,-1 | ||
198 | be .spin | ||
199 | add %i2,%i1,%i2 | ||
200 | stbar | ||
201 | st %i2,[%i0] | ||
202 | sra %i2,%g0,%i0 | ||
203 | ret | ||
204 | restore | ||
205 | .v9: | ||
206 | #endif | ||
207 | ld [%o0],%o2 | ||
208 | 1: add %o1,%o2,%o3 | ||
209 | .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3 | ||
210 | cmp %o2,%o3 | ||
211 | bne 1b | ||
212 | mov %o3,%o2 ! cas is always fetching to dest. register | ||
213 | add %o1,%o2,%o0 ! OpenSSL expects the new value | ||
214 | retl | ||
215 | sra %o0,%g0,%o0 ! we return signed int, remember? | ||
216 | .size OPENSSL_atomic_add,.-OPENSSL_atomic_add | ||
217 | |||
218 | .global _sparcv9_rdtick | ||
219 | .align 32 | ||
220 | _sparcv9_rdtick: | ||
221 | subcc %g0,1,%o0 | ||
222 | .word 0x91408000 !rd %ccr,%o0 | ||
223 | cmp %o0,0x99 | ||
224 | bne .notick | ||
225 | xor %o0,%o0,%o0 | ||
226 | .word 0x91410000 !rd %tick,%o0 | ||
227 | retl | ||
228 | .word 0x93323020 !srlx %o0,32,%o1 | ||
229 | .notick: | ||
230 | retl | ||
231 | xor %o1,%o1,%o1 | ||
232 | .type _sparcv9_rdtick,#function | ||
233 | .size _sparcv9_rdtick,.-_sparcv9_rdtick | ||
234 | |||
235 | .global _sparcv9_vis1_probe | ||
236 | .align 8 | ||
237 | _sparcv9_vis1_probe: | ||
238 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | ||
239 | add %sp,BIAS+2,%o1 | ||
240 | retl | ||
241 | .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0 | ||
242 | .type _sparcv9_vis1_probe,#function | ||
243 | .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe | ||
244 | |||
245 | ! Probe and instrument VIS1 instruction. Output is number of cycles it | ||
246 | ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit | ||
247 | ! is slow (documented to be 6 cycles on T2) and the core is in-order | ||
248 | ! single-issue, it should be possible to distinguish Tx reliably... | ||
249 | ! Observed return values are: | ||
250 | ! | ||
251 | ! UltraSPARC IIe 7 | ||
252 | ! UltraSPARC III 7 | ||
253 | ! UltraSPARC T1 24 | ||
254 | ! | ||
255 | ! Numbers for T2 and SPARC64 V-VII are more than welcomed. | ||
256 | ! | ||
257 | ! It would be possible to detect specifically US-T1 by instrumenting | ||
258 | ! fmul8ulx16, which is emulated on T1 and as such accounts for quite | ||
259 | ! a lot of %tick-s, couple of thousand on Linux... | ||
260 | .global _sparcv9_vis1_instrument | ||
261 | .align 8 | ||
262 | _sparcv9_vis1_instrument: | ||
263 | .word 0x91410000 !rd %tick,%o0 | ||
264 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | ||
265 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | ||
266 | .word 0x93410000 !rd %tick,%o1 | ||
267 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | ||
268 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | ||
269 | .word 0x95410000 !rd %tick,%o2 | ||
270 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | ||
271 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | ||
272 | .word 0x97410000 !rd %tick,%o3 | ||
273 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | ||
274 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | ||
275 | .word 0x99410000 !rd %tick,%o4 | ||
276 | |||
277 | ! calculate intervals | ||
278 | sub %o1,%o0,%o0 | ||
279 | sub %o2,%o1,%o1 | ||
280 | sub %o3,%o2,%o2 | ||
281 | sub %o4,%o3,%o3 | ||
282 | |||
283 | ! find minumum value | ||
284 | cmp %o0,%o1 | ||
285 | .word 0x38680002 !bgu,a %xcc,.+8 | ||
286 | mov %o1,%o0 | ||
287 | cmp %o0,%o2 | ||
288 | .word 0x38680002 !bgu,a %xcc,.+8 | ||
289 | mov %o2,%o0 | ||
290 | cmp %o0,%o3 | ||
291 | .word 0x38680002 !bgu,a %xcc,.+8 | ||
292 | mov %o3,%o0 | ||
293 | |||
294 | retl | ||
295 | nop | ||
296 | .type _sparcv9_vis1_instrument,#function | ||
297 | .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument | ||
298 | |||
299 | .global _sparcv9_vis2_probe | ||
300 | .align 8 | ||
301 | _sparcv9_vis2_probe: | ||
302 | retl | ||
303 | .word 0x81b00980 !bshuffle %f0,%f0,%f0 | ||
304 | .type _sparcv9_vis2_probe,#function | ||
305 | .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe | ||
306 | |||
307 | .global _sparcv9_fmadd_probe | ||
308 | .align 8 | ||
309 | _sparcv9_fmadd_probe: | ||
310 | .word 0x81b00d80 !fxor %f0,%f0,%f0 | ||
311 | .word 0x85b08d82 !fxor %f2,%f2,%f2 | ||
312 | retl | ||
313 | .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0 | ||
314 | .type _sparcv9_fmadd_probe,#function | ||
315 | .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe | ||
316 | |||
317 | .global OPENSSL_cleanse | ||
318 | .align 32 | ||
319 | OPENSSL_cleanse: | ||
320 | cmp %o1,14 | ||
321 | nop | ||
322 | #ifdef ABI64 | ||
323 | bgu %xcc,.Lot | ||
324 | #else | ||
325 | bgu .Lot | ||
326 | #endif | ||
327 | cmp %o1,0 | ||
328 | bne .Little | ||
329 | nop | ||
330 | retl | ||
331 | nop | ||
332 | |||
333 | .Little: | ||
334 | stb %g0,[%o0] | ||
335 | subcc %o1,1,%o1 | ||
336 | bnz .Little | ||
337 | add %o0,1,%o0 | ||
338 | retl | ||
339 | nop | ||
340 | .align 32 | ||
341 | .Lot: | ||
342 | #ifndef ABI64 | ||
343 | subcc %g0,1,%g1 | ||
344 | ! see above for explanation | ||
345 | .word 0x83408000 !rd %ccr,%g1 | ||
346 | cmp %g1,0x99 | ||
347 | bne .v8lot | ||
348 | nop | ||
349 | #endif | ||
350 | |||
351 | .v9lot: andcc %o0,7,%g0 | ||
352 | bz .v9aligned | ||
353 | nop | ||
354 | stb %g0,[%o0] | ||
355 | sub %o1,1,%o1 | ||
356 | ba .v9lot | ||
357 | add %o0,1,%o0 | ||
358 | .align 16,0x01000000 | ||
359 | .v9aligned: | ||
360 | .word 0xc0720000 !stx %g0,[%o0] | ||
361 | sub %o1,8,%o1 | ||
362 | andcc %o1,-8,%g0 | ||
363 | #ifdef ABI64 | ||
364 | .word 0x126ffffd !bnz %xcc,.v9aligned | ||
365 | #else | ||
366 | .word 0x124ffffd !bnz %icc,.v9aligned | ||
367 | #endif | ||
368 | add %o0,8,%o0 | ||
369 | |||
370 | cmp %o1,0 | ||
371 | bne .Little | ||
372 | nop | ||
373 | retl | ||
374 | nop | ||
375 | #ifndef ABI64 | ||
376 | .v8lot: andcc %o0,3,%g0 | ||
377 | bz .v8aligned | ||
378 | nop | ||
379 | stb %g0,[%o0] | ||
380 | sub %o1,1,%o1 | ||
381 | ba .v8lot | ||
382 | add %o0,1,%o0 | ||
383 | nop | ||
384 | .v8aligned: | ||
385 | st %g0,[%o0] | ||
386 | sub %o1,4,%o1 | ||
387 | andcc %o1,-4,%g0 | ||
388 | bnz .v8aligned | ||
389 | add %o0,4,%o0 | ||
390 | |||
391 | cmp %o1,0 | ||
392 | bne .Little | ||
393 | nop | ||
394 | retl | ||
395 | nop | ||
396 | #endif | ||
397 | .type OPENSSL_cleanse,#function | ||
398 | .size OPENSSL_cleanse,.-OPENSSL_cleanse | ||
399 | |||
400 | .section ".init",#alloc,#execinstr | ||
401 | call OPENSSL_cpuid_setup | ||
402 | nop | ||