1 // Works on all IA-64 platforms: Linux, HP-UX, Win64i...
2 // On Win64i compile with ias.exe.
5 #if defined(_HPUX_SOURCE) && !defined(_LP64)
11 .global OPENSSL_cpuid_setup#
12 .proc OPENSSL_cpuid_setup#
14 { .mib; br.ret.sptk.many b0 };;
15 .endp OPENSSL_cpuid_setup#
17 .global OPENSSL_rdtsc#
21 br.ret.sptk.many b0 };;
24 .global OPENSSL_atomic_add#
25 .proc OPENSSL_atomic_add#
36 cmpxchg4.acq r2=[r32],r8,ar.ccv
38 { .mib; cmp.ne p6,p0=r2,r3
40 (p6) br.dpnt .Lspin };;
43 br.ret.sptk.many b0 };;
44 .endp OPENSSL_atomic_add#
46 // Returns a structure comprising pointer to the top of stack of
47 // the caller and pointer beyond backing storage for the current
48 // register frame. The latter is required, because it might be
49 // insufficient to wipe backing storage for the current frame
50 // (as this procedure does), one might have to go further, toward
51 // higher addresses to reach for whole "retroactively" saved
53 .global OPENSSL_wipe_cpu#
54 .proc OPENSSL_wipe_cpu#
61 { .mib; alloc r2=ar.pfs,0,96,0,96
63 brp.loop.imp .L_wipe_top,.L_wipe_end-16
69 { .mii; add r9=96*8-8,r9
72 // One can sweep double as fast, but then we can't quarantee
73 // that backing storage is wiped...
75 { .mfi; st8 [r9]=r0,-8
80 br.ctop.sptk .L_wipe_top };;
128 { .mfi; add r9=96*8+8,r9
134 .endp OPENSSL_wipe_cpu#
136 .global OPENSSL_cleanse#
137 .proc OPENSSL_cleanse#
139 { .mib; cmp.eq p6,p0=0,r33 // len==0
141 (p6) br.ret.spnt b0 };;
143 cmp.leu p6,p0=15,r33 // len>=15
144 (p6) br.cond.dptk .Lot };;
147 { .mib; st1 [r32]=r0,1
148 cmp.ltu p6,p7=1,r33 } // len>1
149 { .mbb; add r33=-1,r33 // len--
150 (p6) br.cond.dptk .Little
151 (p7) br.ret.sptk.many b0 };;
154 { .mib; cmp.eq p6,p0=0,r2
155 (p6) br.cond.dptk .Laligned };;
156 { .mmi; st1 [r32]=r0,1;;
158 { .mib; add r33=-1,r33
162 { .mmi; st8 [r32]=r0,8
163 and r2=-8,r33 // len&~7
164 add r33=-8,r33 };; // len-=8
165 { .mib; cmp.ltu p6,p0=8,r2 // ((len+8)&~7)>8
166 (p6) br.cond.dptk .Laligned };;
168 { .mbb; cmp.eq p6,p7=r0,r33
169 (p7) br.cond.dpnt .Little
170 (p6) br.ret.sptk.many b0 };;
171 .endp OPENSSL_cleanse#
173 .global CRYPTO_memcmp#
180 cmp.eq p6,p0=0,r34 // len==0?
181 (p6) br.ret.spnt b0 };;
183 { .mib; alloc r2=ar.pfs,3,5,0,8
186 brp.loop.imp .Loop_cmp_ctop,.Loop_cmp_cend-16
188 { .mib; sub r10=r34,r0,1
191 { .mii; ADDP r16=0,r32
194 { .mib; ADDP r17=0,r33
198 { .mib; (p16) ld1 r32=[r16],1
199 (p18) xor r34=r34,r38 }
200 { .mib; (p16) ld1 r36=[r17],1
202 br.ctop.sptk .Loop_cmp_ctop };;
205 { .mib; cmp.ne p6,p0=0,r8
210 br.ret.sptk.many b0 };;
213 .global OPENSSL_instrument_bus#
214 .proc OPENSSL_instrument_bus#
215 OPENSSL_instrument_bus:
218 { .mmi; mov r8=ar.itc;;
227 { .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
230 { .mmi; mov r8=ar.itc;;
231 sub r10=r8,r9 // diff=tick-lasttick
232 mov r9=r8 };; // lasttick=tick
238 { .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
241 { .mib; cmp4.ne p6,p0=0,r33
242 (p6) br.cond.dptk .Loop };;
244 { .mib; sub r8=r2,r33
245 br.ret.sptk.many b0 };;
246 .endp OPENSSL_instrument_bus#
248 .global OPENSSL_instrument_bus2#
249 .proc OPENSSL_instrument_bus2#
250 OPENSSL_instrument_bus2:
251 { .mmi; mov r2=r33 // put aside cnt
253 { .mmi; mov r8=ar.itc;;
262 { .mmi; cmpxchg4.acq r3=[r32],r8,ar.ccv
265 { .mmi; mov r8=ar.itc;;
269 { .mmi; mov r11=r10 // lastdiff=diff
270 add r34=-1,r34 };; // --max
273 cmp4.eq p6,p0=0,r34 };;
277 { .mmb; cmpxchg4.acq r3=[r32],r8,ar.ccv
278 (p6) br.cond.spnt .Ldone2 };;
280 { .mmi; mov r8=ar.itc;;
281 sub r10=r8,r9 // diff=tick-lasttick
282 mov r9=r8 };; // lasttick=tick
283 { .mmi; cmp.ne p6,p0=r10,r11;; // diff!=lastdiff
284 (p6) add r33=-1,r33 };; // conditional --cnt
285 { .mib; cmp4.ne p7,p0=0,r33
286 (p6) add r32=4,r32 // conditional ++out
287 (p7) br.cond.dptk .Loop2 };;
289 { .mib; sub r8=r2,r33
290 br.ret.sptk.many b0 };;
291 .endp OPENSSL_instrument_bus2#