1 ! Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
3 ! Licensed under the OpenSSL license (the "License"). You may not use
4 ! this file except in compliance with the License. You can obtain a copy
5 ! in the file LICENSE in the source distribution or at
6 ! https://www.openssl.org/source/license.html
8 #if defined(__SUNPRO_C) && defined(__sparcv9)
9 # define ABI64 /* They've said -xarch=v9 at command line */
10 #elif defined(__GNUC__) && defined(__arch64__)
11 # define ABI64 /* They've said -m64 at command line */
15 .register %g2,#scratch
16 .register %g3,#scratch
26 .global OPENSSL_wipe_cpu
27 .type OPENSSL_wipe_cpu,#function
28 ! Keep in mind that this does not excuse us from wiping the stack!
29 ! This routine wipes registers, but not the backing store [which
30 ! resides on the stack, toward lower addresses]. To facilitate for
31 ! stack wiping I return pointer to the top of stack of the *caller*.
48 ! Following is V9 "rd %ccr,%o0" instruction. However! V8
49 ! specification says that it ("rd %asr2,%o0" in V8 terms) does
50 ! not cause illegal_instruction trap. It therefore can be used
51 ! to determine if the CPU the code is executing on is V8- or
52 ! V9-compliant, as V9 returns a distinct value of 0x99,
53 ! "negative" and "borrow" bits set in both %icc and %xcc.
54 .word 0x91408000 !rd %ccr,%o0
58 ! Even though we do not use %fp register bank,
59 ! we wipe it as memcpy might have used it...
60 .word 0xbfa00040 !fmovd %f0,%f62
75 .word 0x83a00040 !fmovd %f0,%f32
133 add %fp,BIAS,%i0 ! return pointer to callerĀ“s top of stack
143 .global walk_reg_wins
144 .type walk_reg_wins,#function
152 cmp %o7,0 ! compiler never cleans %o7...
153 be 1f ! could have been a leaf function...
170 add %o0,1,%i0 ! used for debugging
173 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
175 .global OPENSSL_atomic_add
176 .type OPENSSL_atomic_add,#function
181 .word 0x95408000 !rd %ccr,%o2, see comment above
189 ! Note that you do not have to link with libthread to call thr_yield,
190 ! as libc provides a stub, which is overloaded the moment you link
191 ! with *either* libpthread or libthread...
192 #define YIELD_CPU thr_yield
194 ! applies at least to Linux and FreeBSD... Feedback expected...
195 #define YIELD_CPU sched_yield
197 .spin: call YIELD_CPU
216 .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
219 mov %o3,%o2 ! cas is always fetching to dest. register
220 add %o1,%o2,%o0 ! OpenSSL expects the new value
222 sra %o0,%g0,%o0 ! we return signed int, remember?
223 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
225 .global _sparcv9_rdtick
229 .word 0x91408000 !rd %ccr,%o0
233 .word 0x91410000 !rd %tick,%o0
235 .word 0x93323020 !srlx %o0,32,%o1
239 .type _sparcv9_rdtick,#function
240 .size _sparcv9_rdtick,.-_sparcv9_rdtick
242 .global _sparcv9_vis1_probe
246 .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
248 .word 0x81b00d80 !fxor %f0,%f0,%f0
249 .type _sparcv9_vis1_probe,#function
250 .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
252 ! Probe and instrument VIS1 instruction. Output is number of cycles it
253 ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
254 ! is slow (documented to be 6 cycles on T2) and the core is in-order
255 ! single-issue, it should be possible to distinguish Tx reliably...
256 ! Observed return values are:
263 ! (*) result has lesser to do with VIS instruction latencies, rdtick
264 ! appears that slow, but it does the trick in sense that FP and
265 ! VIS code paths are still slower than integer-only ones.
267 ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
269 ! It would be possible to detect specifically US-T1 by instrumenting
270 ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
271 ! a lot of %tick-s, couple of thousand on Linux...
272 .global _sparcv9_vis1_instrument
274 _sparcv9_vis1_instrument:
275 .word 0x81b00d80 !fxor %f0,%f0,%f0
276 .word 0x85b08d82 !fxor %f2,%f2,%f2
277 .word 0x91410000 !rd %tick,%o0
278 .word 0x81b00d80 !fxor %f0,%f0,%f0
279 .word 0x85b08d82 !fxor %f2,%f2,%f2
280 .word 0x93410000 !rd %tick,%o1
281 .word 0x81b00d80 !fxor %f0,%f0,%f0
282 .word 0x85b08d82 !fxor %f2,%f2,%f2
283 .word 0x95410000 !rd %tick,%o2
284 .word 0x81b00d80 !fxor %f0,%f0,%f0
285 .word 0x85b08d82 !fxor %f2,%f2,%f2
286 .word 0x97410000 !rd %tick,%o3
287 .word 0x81b00d80 !fxor %f0,%f0,%f0
288 .word 0x85b08d82 !fxor %f2,%f2,%f2
289 .word 0x99410000 !rd %tick,%o4
291 ! calculate intervals
299 .word 0x38680002 !bgu,a %xcc,.+8
302 .word 0x38680002 !bgu,a %xcc,.+8
305 .word 0x38680002 !bgu,a %xcc,.+8
310 .type _sparcv9_vis1_instrument,#function
311 .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
313 .global _sparcv9_vis2_probe
317 .word 0x81b00980 !bshuffle %f0,%f0,%f0
318 .type _sparcv9_vis2_probe,#function
319 .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
321 .global _sparcv9_fmadd_probe
323 _sparcv9_fmadd_probe:
324 .word 0x81b00d80 !fxor %f0,%f0,%f0
325 .word 0x85b08d82 !fxor %f2,%f2,%f2
327 .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
328 .type _sparcv9_fmadd_probe,#function
329 .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
331 .global _sparcv9_rdcfr
335 .word 0x91468000 !rd %asr26,%o0
336 .type _sparcv9_rdcfr,#function
337 .size _sparcv9_rdcfr,.-_sparcv9_rdcfr
339 .global _sparcv9_vis3_probe
343 .word 0x81b022a0 !xmulx %g0,%g0,%g0
344 .type _sparcv9_vis3_probe,#function
345 .size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe
347 .global _sparcv9_random
351 .word 0x91b002a0 !random %o0
352 .type _sparcv9_random,#function
353 .size _sparcv9_random,.-_sparcv9_vis3_probe
355 .global _sparcv9_fjaesx_probe
357 _sparcv9_fjaesx_probe:
358 .word 0x81b09206 !faesencx %f2,%f6,%f0
361 .size _sparcv9_fjaesx_probe,.-_sparcv9_fjaesx_probe
363 .global OPENSSL_cleanse
390 ! see above for explanation
391 .word 0x83408000 !rd %ccr,%g1
397 .v9lot: andcc %o0,7,%g0
406 .word 0xc0720000 !stx %g0,[%o0]
410 .word 0x126ffffd !bnz %xcc,.v9aligned
412 .word 0x124ffffd !bnz %icc,.v9aligned
422 .v8lot: andcc %o0,3,%g0
443 .type OPENSSL_cleanse,#function
444 .size OPENSSL_cleanse,.-OPENSSL_cleanse
446 .global CRYPTO_memcmp
451 beq,pn %xcc,.Lno_data
477 .type CRYPTO_memcmp,#function
478 .size CRYPTO_memcmp,.-CRYPTO_memcmp
480 .global _sparcv9_vis1_instrument_bus
482 _sparcv9_vis1_instrument_bus:
483 mov %o1,%o3 ! save cnt
484 .word 0x99410000 !rd %tick,%o4 ! tick
485 mov %o4,%o5 ! lasttick = tick
489 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
490 .word 0x8143e040 !membar #Sync
491 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
492 .word 0x8143e040 !membar #Sync
495 .word 0xc9e2100c !cas [%o0],%o4,%g4
497 .Loop: .word 0x99410000 !rd %tick,%o4
498 sub %o4,%o5,%g4 ! diff=tick-lasttick
499 mov %o4,%o5 ! lasttick=tick
502 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
503 .word 0x8143e040 !membar #Sync
504 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
505 .word 0x8143e040 !membar #Sync
508 .word 0xc9e2100c !cas [%o0],%o4,%g4
509 subcc %o1,1,%o1 ! --$cnt
511 add %o0,4,%o0 ! ++$out
515 .type _sparcv9_vis1_instrument_bus,#function
516 .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
518 .global _sparcv9_vis1_instrument_bus2
520 _sparcv9_vis1_instrument_bus2:
521 mov %o1,%o3 ! save cnt
522 sll %o1,2,%o1 ! cnt*=4
524 .word 0x99410000 !rd %tick,%o4 ! tick
525 mov %o4,%o5 ! lasttick = tick
529 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
530 .word 0x8143e040 !membar #Sync
531 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
532 .word 0x8143e040 !membar #Sync
535 .word 0xc9e2100c !cas [%o0],%o4,%g4
537 .word 0x99410000 !rd %tick,%o4 ! tick
538 sub %o4,%o5,%g4 ! diff=tick-lasttick
539 mov %o4,%o5 ! lasttick=tick
540 mov %g4,%g5 ! lastdiff=diff
543 .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
544 .word 0x8143e040 !membar #Sync
545 .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
546 .word 0x8143e040 !membar #Sync
549 .word 0xc9e2100c !cas [%o0],%o4,%g4
551 subcc %o2,1,%o2 ! --max
555 .word 0x99410000 !rd %tick,%o4 ! tick
556 sub %o4,%o5,%g4 ! diff=tick-lasttick
557 mov %o4,%o5 ! lasttick=tick
559 mov %g4,%g5 ! lastdiff=diff
561 .word 0x83408000 !rd %ccr,%g1
562 and %g1,4,%g1 ! isolate zero flag
563 xor %g1,4,%g1 ! flip zero flag
565 subcc %o1,%g1,%o1 ! conditional --$cnt
567 add %o0,%g1,%o0 ! conditional ++$out
573 .type _sparcv9_vis1_instrument_bus2,#function
574 .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
576 .section ".init",#alloc,#execinstr
577 call OPENSSL_cpuid_setup