1 #ifdef OPENSSL_FIPSCANISTER
2 #include <openssl/fipssyms.h>
5 #if defined(__SUNPRO_C) && defined(__sparcv9)
6 # define ABI64 /* They've said -xarch=v9 at command line */
7 #elif defined(__GNUC__) && defined(__arch64__)
8 # define ABI64 /* They've said -m64 at command line */
12 .register %g2,#scratch
13 .register %g3,#scratch
23 .global OPENSSL_wipe_cpu
24 .type OPENSSL_wipe_cpu,#function
25 ! Keep in mind that this does not excuse us from wiping the stack!
26 ! This routine wipes registers, but not the backing store [which
27 ! resides on the stack, toward lower addresses]. To facilitate for
28 ! stack wiping I return pointer to the top of stack of the *caller*.
45 ! Following is V9 "rd %ccr,%o0" instruction. However! V8
46 ! specification says that it ("rd %asr2,%o0" in V8 terms) does
47 ! not cause illegal_instruction trap. It therefore can be used
48 ! to determine if the CPU the code is executing on is V8- or
49 ! V9-compliant, as V9 returns a distinct value of 0x99,
50 ! "negative" and "borrow" bits set in both %icc and %xcc.
51 .word 0x91408000 !rd %ccr,%o0
55 ! Even though we do not use %fp register bank,
56 ! we wipe it as memcpy might have used it...
57 .word 0xbfa00040 !fmovd %f0,%f62
72 .word 0x83a00040 !fmovd %f0,%f32
130 add %fp,BIAS,%i0 ! return pointer to callerĀ“s top of stack
140 .global walk_reg_wins
141 .type walk_reg_wins,#function
149 cmp %o7,0 ! compiler never cleans %o7...
150 be 1f ! could have been a leaf function...
167 add %o0,1,%i0 ! used for debugging
170 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
172 .global OPENSSL_atomic_add
173 .type OPENSSL_atomic_add,#function
178 .word 0x95408000 !rd %ccr,%o2, see comment above
186 ! Note that you do not have to link with libthread to call thr_yield,
187 ! as libc provides a stub, which is overloaded the moment you link
188 ! with *either* libpthread or libthread...
189 #define YIELD_CPU thr_yield
191 ! applies at least to Linux and FreeBSD... Feedback expected...
192 #define YIELD_CPU sched_yield
194 .spin: call YIELD_CPU
213 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
216 mov %o3,%o2 ! cas is always fetching to dest. register
217 add %o1,%o2,%o0 ! OpenSSL expects the new value
219 sra %o0,%g0,%o0 ! we return signed int, remember?
220 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
222 .global _sparcv9_rdtick
226 .word 0x91408000 !rd %ccr,%o0
230 .word 0x91410000 !rd %tick,%o0
232 .word 0x93323020 !srlx %o0,32,%o1
236 .type _sparcv9_rdtick,#function
237 .size _sparcv9_rdtick,.-_sparcv9_rdtick
239 .global _sparcv9_vis1_probe
243 .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
245 .word 0x81b00d80 !fxor %f0,%f0,%f0
246 .type _sparcv9_vis1_probe,#function
247 .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
249 ! Probe and instrument VIS1 instruction. Output is number of cycles it
250 ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
251 ! is slow (documented to be 6 cycles on T2) and the core is in-order
252 ! single-issue, it should be possible to distinguish Tx reliably...
253 ! Observed return values are:
260 ! (*) result has lesser to do with VIS instruction latencies, rdtick
261 ! appears that slow, but it does the trick in sense that FP and
262 ! VIS code paths are still slower than integer-only ones.
264 ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
266 ! It would be possible to detect specifically US-T1 by instrumenting
267 ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
268 ! a lot of %tick-s, couple of thousand on Linux...
269 .global _sparcv9_vis1_instrument
271 _sparcv9_vis1_instrument:
272 .word 0x81b00d80 !fxor %f0,%f0,%f0
273 .word 0x85b08d82 !fxor %f2,%f2,%f2
274 .word 0x91410000 !rd %tick,%o0
275 .word 0x81b00d80 !fxor %f0,%f0,%f0
276 .word 0x85b08d82 !fxor %f2,%f2,%f2
277 .word 0x93410000 !rd %tick,%o1
278 .word 0x81b00d80 !fxor %f0,%f0,%f0
279 .word 0x85b08d82 !fxor %f2,%f2,%f2
280 .word 0x95410000 !rd %tick,%o2
281 .word 0x81b00d80 !fxor %f0,%f0,%f0
282 .word 0x85b08d82 !fxor %f2,%f2,%f2
283 .word 0x97410000 !rd %tick,%o3
284 .word 0x81b00d80 !fxor %f0,%f0,%f0
285 .word 0x85b08d82 !fxor %f2,%f2,%f2
286 .word 0x99410000 !rd %tick,%o4
288 ! calculate intervals
296 .word 0x38680002 !bgu,a %xcc,.+8
299 .word 0x38680002 !bgu,a %xcc,.+8
302 .word 0x38680002 !bgu,a %xcc,.+8
307 .type _sparcv9_vis1_instrument,#function
308 .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
310 .global _sparcv9_vis2_probe
314 .word 0x81b00980 !bshuffle %f0,%f0,%f0
315 .type _sparcv9_vis2_probe,#function
316 .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
318 .global _sparcv9_fmadd_probe
320 _sparcv9_fmadd_probe:
321 .word 0x81b00d80 !fxor %f0,%f0,%f0
322 .word 0x85b08d82 !fxor %f2,%f2,%f2
324 .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
325 .type _sparcv9_fmadd_probe,#function
326 .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
328 .global _sparcv9_rdcfr
332 .word 0x91468000 !rd %asr26,%o0
333 .type _sparcv9_rdcfr,#function
334 .size _sparcv9_rdcfr,.-_sparcv9_rdcfr
336 .global _sparcv9_vis3_probe
340 .word 0x81b022a0 !xmulx %g0,%g0,%g0
341 .type _sparcv9_vis3_probe,#function
342 .size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe
344 .global _sparcv9_random
348 .word 0x91b002a0 !random %o0
349 .type _sparcv9_random,#function
350 .size _sparcv9_random,.-_sparcv9_vis3_probe
352 .global OPENSSL_cleanse
379 ! see above for explanation
380 .word 0x83408000 !rd %ccr,%g1
386 .v9lot: andcc %o0,7,%g0
395 .word 0xc0720000 !stx %g0,[%o0]
399 .word 0x126ffffd !bnz %xcc,.v9aligned
401 .word 0x124ffffd !bnz %icc,.v9aligned
411 .v8lot: andcc %o0,3,%g0
432 .type OPENSSL_cleanse,#function
433 .size OPENSSL_cleanse,.-OPENSSL_cleanse
435 .global _sparcv9_vis1_instrument_bus
437 _sparcv9_vis1_instrument_bus:
438 mov %o1,%o3 ! save cnt
439 .word 0x99410000 !rd %tick,%o4 ! tick
440 mov %o4,%o5 ! lasttick = tick
444 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
445 .word 0x8143e040 !membar #Sync
446 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
447 .word 0x8143e040 !membar #Sync
450 .word 0xc9e2100c !cas [%o0],%o4,%g4
452 .Loop: .word 0x99410000 !rd %tick,%o4
453 sub %o4,%o5,%g4 ! diff=tick-lasttick
454 mov %o4,%o5 ! lasttick=tick
457 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
458 .word 0x8143e040 !membar #Sync
459 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
460 .word 0x8143e040 !membar #Sync
463 .word 0xc9e2100c !cas [%o0],%o4,%g4
464 subcc %o1,1,%o1 ! --$cnt
466 add %o0,4,%o0 ! ++$out
470 .type _sparcv9_vis1_instrument_bus,#function
471 .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
473 .global _sparcv9_vis1_instrument_bus2
475 _sparcv9_vis1_instrument_bus2:
476 mov %o1,%o3 ! save cnt
477 sll %o1,2,%o1 ! cnt*=4
479 .word 0x99410000 !rd %tick,%o4 ! tick
480 mov %o4,%o5 ! lasttick = tick
484 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
485 .word 0x8143e040 !membar #Sync
486 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
487 .word 0x8143e040 !membar #Sync
490 .word 0xc9e2100c !cas [%o0],%o4,%g4
492 .word 0x99410000 !rd %tick,%o4 ! tick
493 sub %o4,%o5,%g4 ! diff=tick-lasttick
494 mov %o4,%o5 ! lasttick=tick
495 mov %g4,%g5 ! lastdiff=diff
498 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
499 .word 0x8143e040 !membar #Sync
500 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
501 .word 0x8143e040 !membar #Sync
504 .word 0xc9e2100c !cas [%o0],%o4,%g4
506 subcc %o2,1,%o2 ! --max
510 .word 0x99410000 !rd %tick,%o4 ! tick
511 sub %o4,%o5,%g4 ! diff=tick-lasttick
512 mov %o4,%o5 ! lasttick=tick
514 mov %g4,%g5 ! lastdiff=diff
516 .word 0x83408000 !rd %ccr,%g1
517 and %g1,4,%g1 ! isolate zero flag
518 xor %g1,4,%g1 ! flip zero flag
520 subcc %o1,%g1,%o1 ! conditional --$cnt
522 add %o0,%g1,%o0 ! conditional ++$out
528 .type _sparcv9_vis1_instrument_bus2,#function
529 .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
531 .section ".init",#alloc,#execinstr
532 call OPENSSL_cpuid_setup