1 #if defined(__SUNPRO_C) && defined(__sparcv9)
2 # define ABI64 /* They've said -xarch=v9 at command line */
3 #elif defined(__GNUC__) && defined(__arch64__)
4 # define ABI64 /* They've said -m64 at command line */
19 .global OPENSSL_wipe_cpu
20 .type OPENSSL_wipe_cpu,#function
21 ! Keep in mind that this does not excuse us from wiping the stack!
22 ! This routine wipes registers, but not the backing store [which
23 ! resides on the stack, toward lower addresses]. To facilitate for
24 ! stack wiping I return pointer to the top of stack of the *caller*.
41 ! Following is V9 "rd %ccr,%o0" instruction. However! V8
42 ! specification says that it ("rd %asr2,%o0" in V8 terms) does
43 ! not cause illegal_instruction trap. It therefore can be used
44 ! to determine if the CPU the code is executing on is V8- or
45 ! V9-compliant, as V9 returns a distinct value of 0x99,
46 ! "negative" and "borrow" bits set in both %icc and %xcc.
47 .word 0x91408000 !rd %ccr,%o0
51 ! Even though we do not use %fp register bank,
52 ! we wipe it as memcpy might have used it...
53 .word 0xbfa00040 !fmovd %f0,%f62
68 .word 0x83a00040 !fmovd %f0,%f32
126 add %fp,BIAS,%i0 ! return pointer to callerĀ“s top of stack
136 .global walk_reg_wins
137 .type walk_reg_wins,#function
145 cmp %o7,0 ! compiler never cleans %o7...
146 be 1f ! could have been a leaf function...
163 add %o0,1,%i0 ! used for debugging
166 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
168 .global OPENSSL_atomic_add
169 .type OPENSSL_atomic_add,#function
174 .word 0x95408000 !rd %ccr,%o2, see comment above
182 ! Note that you do not have to link with libthread to call thr_yield,
183 ! as libc provides a stub, which is overloaded the moment you link
184 ! with *either* libpthread or libthread...
185 #define YIELD_CPU thr_yield
187 ! applies at least to Linux and FreeBSD... Feedback expected...
188 #define YIELD_CPU sched_yield
190 .spin: call YIELD_CPU
209 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
212 mov %o3,%o2 ! cas is always fetching to dest. register
213 add %o1,%o2,%o0 ! OpenSSL expects the new value
215 sra %o0,%g0,%o0 ! we return signed int, remember?
216 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
218 .global _sparcv9_rdtick
222 .word 0x91408000 !rd %ccr,%o0
226 .word 0x91410000 !rd %tick,%o0
228 .word 0x93323020 !srlx %o0,32,%o1
232 .type _sparcv9_rdtick,#function
233 .size _sparcv9_rdtick,.-_sparcv9_rdtick
235 .global _sparcv9_vis1_probe
239 .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
241 .word 0x81b00d80 !fxor %f0,%f0,%f0
242 .type _sparcv9_vis1_probe,#function
243 .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
245 ! Probe and instrument VIS1 instruction. Output is number of cycles it
246 ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
247 ! is slow (documented to be 6 cycles on T2) and the core is in-order
248 ! single-issue, it should be possible to distinguish Tx reliably...
249 ! Observed return values are:
256 ! (*) result has lesser to do with VIS instruction latencies, rdtick
257 ! appears that slow, but it does the trick in sense that FP and
258 ! VIS code paths are still slower than integer-only ones.
260 ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
262 ! It would be possible to detect specifically US-T1 by instrumenting
263 ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
264 ! a lot of %tick-s, couple of thousand on Linux...
265 .global _sparcv9_vis1_instrument
267 _sparcv9_vis1_instrument:
268 .word 0x81b00d80 !fxor %f0,%f0,%f0
269 .word 0x85b08d82 !fxor %f2,%f2,%f2
270 .word 0x91410000 !rd %tick,%o0
271 .word 0x81b00d80 !fxor %f0,%f0,%f0
272 .word 0x85b08d82 !fxor %f2,%f2,%f2
273 .word 0x93410000 !rd %tick,%o1
274 .word 0x81b00d80 !fxor %f0,%f0,%f0
275 .word 0x85b08d82 !fxor %f2,%f2,%f2
276 .word 0x95410000 !rd %tick,%o2
277 .word 0x81b00d80 !fxor %f0,%f0,%f0
278 .word 0x85b08d82 !fxor %f2,%f2,%f2
279 .word 0x97410000 !rd %tick,%o3
280 .word 0x81b00d80 !fxor %f0,%f0,%f0
281 .word 0x85b08d82 !fxor %f2,%f2,%f2
282 .word 0x99410000 !rd %tick,%o4
284 ! calculate intervals
292 .word 0x38680002 !bgu,a %xcc,.+8
295 .word 0x38680002 !bgu,a %xcc,.+8
298 .word 0x38680002 !bgu,a %xcc,.+8
303 .type _sparcv9_vis1_instrument,#function
304 .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
306 .global _sparcv9_vis2_probe
310 .word 0x81b00980 !bshuffle %f0,%f0,%f0
311 .type _sparcv9_vis2_probe,#function
312 .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
314 .global _sparcv9_fmadd_probe
316 _sparcv9_fmadd_probe:
317 .word 0x81b00d80 !fxor %f0,%f0,%f0
318 .word 0x85b08d82 !fxor %f2,%f2,%f2
320 .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
321 .type _sparcv9_fmadd_probe,#function
322 .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
324 .global _sparcv9_rdcfr
328 .word 0x91468000 !rd %asr26,%o0
329 .type _sparcv9_rdcfr,#function
330 .size _sparcv9_rdcfr,.-_sparcv9_rdcfr
332 .global _sparcv9_vis3_probe
336 .word 0x81b022a0 !xmulx %g0,%g0,%g0
337 .type _sparcv9_vis3_probe,#function
338 .size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe
340 .global _sparcv9_random
344 .word 0x91b002a0 !random %o0
345 .type _sparcv9_random,#function
346 .size _sparcv9_random,.-_sparcv9_vis3_probe
348 .global OPENSSL_cleanse
375 ! see above for explanation
376 .word 0x83408000 !rd %ccr,%g1
382 .v9lot: andcc %o0,7,%g0
391 .word 0xc0720000 !stx %g0,[%o0]
395 .word 0x126ffffd !bnz %xcc,.v9aligned
397 .word 0x124ffffd !bnz %icc,.v9aligned
407 .v8lot: andcc %o0,3,%g0
428 .type OPENSSL_cleanse,#function
429 .size OPENSSL_cleanse,.-OPENSSL_cleanse
431 .global _sparcv9_vis1_instrument_bus
433 _sparcv9_vis1_instrument_bus:
434 mov %o1,%o3 ! save cnt
435 .word 0x99410000 !rd %tick,%o4 ! tick
436 mov %o4,%o5 ! lasttick = tick
440 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
441 .word 0x8143e040 !membar #Sync
442 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
443 .word 0x8143e040 !membar #Sync
446 .word 0xc9e2100c !cas [%o0],%o4,%g4
448 .Loop: .word 0x99410000 !rd %tick,%o4
449 sub %o4,%o5,%g4 ! diff=tick-lasttick
450 mov %o4,%o5 ! lasttick=tick
453 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
454 .word 0x8143e040 !membar #Sync
455 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
456 .word 0x8143e040 !membar #Sync
459 .word 0xc9e2100c !cas [%o0],%o4,%g4
460 subcc %o1,1,%o1 ! --$cnt
462 add %o0,4,%o0 ! ++$out
466 .type _sparcv9_vis1_instrument_bus,#function
467 .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
469 .global _sparcv9_vis1_instrument_bus2
471 _sparcv9_vis1_instrument_bus2:
472 mov %o1,%o3 ! save cnt
473 sll %o1,2,%o1 ! cnt*=4
475 .word 0x99410000 !rd %tick,%o4 ! tick
476 mov %o4,%o5 ! lasttick = tick
480 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
481 .word 0x8143e040 !membar #Sync
482 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
483 .word 0x8143e040 !membar #Sync
486 .word 0xc9e2100c !cas [%o0],%o4,%g4
488 .word 0x99410000 !rd %tick,%o4 ! tick
489 sub %o4,%o5,%g4 ! diff=tick-lasttick
490 mov %o4,%o5 ! lasttick=tick
491 mov %g4,%g5 ! lastdiff=diff
494 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
495 .word 0x8143e040 !membar #Sync
496 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
497 .word 0x8143e040 !membar #Sync
500 .word 0xc9e2100c !cas [%o0],%o4,%g4
502 subcc %o2,1,%o2 ! --max
506 .word 0x99410000 !rd %tick,%o4 ! tick
507 sub %o4,%o5,%g4 ! diff=tick-lasttick
508 mov %o4,%o5 ! lasttick=tick
510 mov %g4,%g5 ! lastdiff=diff
512 .word 0x83408000 !rd %ccr,%g1
513 and %g1,4,%g1 ! isolate zero flag
514 xor %g1,4,%g1 ! flip zero flag
516 subcc %o1,%g1,%o1 ! conditional --$cnt
518 add %o0,%g1,%o0 ! conditional ++$out
524 .type _sparcv9_vis1_instrument_bus2,#function
525 .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
527 .section ".init",#alloc,#execinstr
528 call OPENSSL_cpuid_setup