Elide OPENSSL_INIT_set_config_filename() for no-stdio build
[oweals/openssl.git] / crypto / ia64cpuid.S
1 // Works on all IA-64 platforms: Linux, HP-UX, Win64i...
2 // On Win64i compile with ias.exe.
3 .text
4
5 .global OPENSSL_cpuid_setup#
6 .proc   OPENSSL_cpuid_setup#
7 OPENSSL_cpuid_setup:
8 { .mib; br.ret.sptk.many        b0              };;
9 .endp   OPENSSL_cpuid_setup#
10
11 .global OPENSSL_rdtsc#
12 .proc   OPENSSL_rdtsc#
13 OPENSSL_rdtsc:
14 { .mib; mov                     r8=ar.itc
15         br.ret.sptk.many        b0              };;
16 .endp   OPENSSL_rdtsc#
17
18 .global OPENSSL_atomic_add#
19 .proc   OPENSSL_atomic_add#
20 .align  32
21 OPENSSL_atomic_add:
22 { .mii; ld4             r2=[r32]
23         nop.i           0
24         nop.i           0               };;
25 .Lspin:
26 { .mii; mov             ar.ccv=r2
27         add             r8=r2,r33
28         mov             r3=r2           };;
29 { .mmi; mf;;
30         cmpxchg4.acq    r2=[r32],r8,ar.ccv
31         nop.i           0               };;
32 { .mib; cmp.ne          p6,p0=r2,r3
33         nop.i           0
34 (p6)    br.dpnt         .Lspin          };;
35 { .mib; nop.m           0
36         sxt4            r8=r8
37         br.ret.sptk.many        b0      };;
38 .endp   OPENSSL_atomic_add#
39
40 // Returns a structure comprising pointer to the top of stack of
41 // the caller and pointer beyond backing storage for the current
42 // register frame. The latter is required, because it might be
43 // insufficient to wipe backing storage for the current frame
44 // (as this procedure does), one might have to go further, toward
45 // higher addresses to reach for whole "retroactively" saved
46 // context...
47 .global OPENSSL_wipe_cpu#
48 .proc   OPENSSL_wipe_cpu#
49 .align  32
50 OPENSSL_wipe_cpu:
51         .prologue
52         .fframe 0
53         .save   ar.pfs,r2
54         .save   ar.lc,r3
55 { .mib; alloc           r2=ar.pfs,0,96,0,96
56         mov             r3=ar.lc
57         brp.loop.imp    .L_wipe_top,.L_wipe_end-16
58                                         };;
59 { .mii; mov             r9=ar.bsp
60         mov             r8=pr
61         mov             ar.lc=96        };;
62         .body
63 { .mii; add             r9=96*8-8,r9
64         mov             ar.ec=1         };;
65
66 // One can sweep double as fast, but then we can't quarantee
67 // that backing storage is wiped...
68 .L_wipe_top:
69 { .mfi; st8             [r9]=r0,-8
70         mov             f127=f0
71         mov             r127=r0         }
72 { .mfb; nop.m           0
73         nop.f           0
74         br.ctop.sptk    .L_wipe_top     };;
75 .L_wipe_end:
76
77 { .mfi; mov             r11=r0
78         mov             f6=f0
79         mov             r14=r0          }
80 { .mfi; mov             r15=r0
81         mov             f7=f0
82         mov             r16=r0          }
83 { .mfi; mov             r17=r0
84         mov             f8=f0
85         mov             r18=r0          }
86 { .mfi; mov             r19=r0
87         mov             f9=f0
88         mov             r20=r0          }
89 { .mfi; mov             r21=r0
90         mov             f10=f0
91         mov             r22=r0          }
92 { .mfi; mov             r23=r0
93         mov             f11=f0
94         mov             r24=r0          }
95 { .mfi; mov             r25=r0
96         mov             f12=f0
97         mov             r26=r0          }
98 { .mfi; mov             r27=r0
99         mov             f13=f0
100         mov             r28=r0          }
101 { .mfi; mov             r29=r0
102         mov             f14=f0
103         mov             r30=r0          }
104 { .mfi; mov             r31=r0
105         mov             f15=f0
106         nop.i           0               }
107 { .mfi; mov             f16=f0          }
108 { .mfi; mov             f17=f0          }
109 { .mfi; mov             f18=f0          }
110 { .mfi; mov             f19=f0          }
111 { .mfi; mov             f20=f0          }
112 { .mfi; mov             f21=f0          }
113 { .mfi; mov             f22=f0          }
114 { .mfi; mov             f23=f0          }
115 { .mfi; mov             f24=f0          }
116 { .mfi; mov             f25=f0          }
117 { .mfi; mov             f26=f0          }
118 { .mfi; mov             f27=f0          }
119 { .mfi; mov             f28=f0          }
120 { .mfi; mov             f29=f0          }
121 { .mfi; mov             f30=f0          }
122 { .mfi; add             r9=96*8+8,r9
123         mov             f31=f0
124         mov             pr=r8,0x1ffff   }
125 { .mib; mov             r8=sp
126         mov             ar.lc=r3
127         br.ret.sptk     b0              };;
128 .endp   OPENSSL_wipe_cpu#
129
130 .global OPENSSL_cleanse#
131 .proc   OPENSSL_cleanse#
132 OPENSSL_cleanse:
133 { .mib; cmp.eq          p6,p0=0,r33         // len==0
134 #if defined(_HPUX_SOURCE) && !defined(_LP64)
135         addp4           r32=0,r32
136 #endif
137 (p6)    br.ret.spnt     b0              };;
138 { .mib; and             r2=7,r32
139         cmp.leu         p6,p0=15,r33        // len>=15
140 (p6)    br.cond.dptk    .Lot            };;
141
142 .Little:
143 { .mib; st1             [r32]=r0,1
144         cmp.ltu         p6,p7=1,r33     }  // len>1
145 { .mbb; add             r33=-1,r33         // len--
146 (p6)    br.cond.dptk    .Little
147 (p7)    br.ret.sptk.many        b0      };;
148
149 .Lot:
150 { .mib; cmp.eq          p6,p0=0,r2
151 (p6)    br.cond.dptk    .Laligned       };;
152 { .mmi; st1             [r32]=r0,1;;
153         and             r2=7,r32        }
154 { .mib; add             r33=-1,r33
155         br              .Lot            };;
156
157 .Laligned:
158 { .mmi; st8             [r32]=r0,8
159         and             r2=-8,r33           // len&~7
160         add             r33=-8,r33      };; // len-=8
161 { .mib; cmp.ltu         p6,p0=8,r2          // ((len+8)&~7)>8
162 (p6)    br.cond.dptk    .Laligned       };;
163
164 { .mbb; cmp.eq          p6,p7=r0,r33
165 (p7)    br.cond.dpnt    .Little
166 (p6)    br.ret.sptk.many        b0      };;
167 .endp   OPENSSL_cleanse#
168
169 .global OPENSSL_instrument_bus#
170 .proc   OPENSSL_instrument_bus#
171 OPENSSL_instrument_bus:
172 { .mmi; mov             r2=r33
173 #if defined(_HPUX_SOURCE) && !defined(_LP64)
174         addp4           r32=0,r32
175 #endif
176                                         }
177 { .mmi; mov             r8=ar.itc;;
178         mov             r10=r0
179         mov             r9=r8           };;
180
181 { .mmi; fc              r32;;
182         ld4             r8=[r32]        };;
183 { .mmi; mf
184         mov             ar.ccv=r8
185         add             r8=r8,r10       };;
186 { .mmi; cmpxchg4.acq    r3=[r32],r8,ar.ccv
187                                         };;
188 .Loop:
189 { .mmi; mov             r8=ar.itc;;
190         sub             r10=r8,r9               // diff=tick-lasttick
191         mov             r9=r8           };;     // lasttick=tick
192 { .mmi; fc              r32;;
193         ld4             r8=[r32]        };;
194 { .mmi; mf
195         mov             ar.ccv=r8
196         add             r8=r8,r10       };;
197 { .mmi; cmpxchg4.acq    r3=[r32],r8,ar.ccv
198         add             r33=-1,r33
199         add             r32=4,r32       };;
200 { .mib; cmp4.ne         p6,p0=0,r33
201 (p6)    br.cond.dptk    .Loop           };;
202
203 { .mib; sub             r8=r2,r33
204         br.ret.sptk.many        b0      };;
205 .endp   OPENSSL_instrument_bus#
206
207 .global OPENSSL_instrument_bus2#
208 .proc   OPENSSL_instrument_bus2#
209 OPENSSL_instrument_bus2:
210 { .mmi; mov             r2=r33                  // put aside cnt
211 #if defined(_HPUX_SOURCE) && !defined(_LP64)
212         addp4           r32=0,r32
213 #endif
214                                         }
215 { .mmi; mov             r8=ar.itc;;
216         mov             r10=r0
217         mov             r9=r8           };;
218
219 { .mmi; fc              r32;;
220         ld4             r8=[r32]        };;
221 { .mmi; mf
222         mov             ar.ccv=r8
223         add             r8=r8,r10       };;
224 { .mmi; cmpxchg4.acq    r3=[r32],r8,ar.ccv
225                                         };;
226
227 { .mmi; mov             r8=ar.itc;;
228         sub             r10=r8,r9
229         mov             r9=r8           };;
230 .Loop2:
231 { .mmi; mov             r11=r10                 // lastdiff=diff
232         add             r34=-1,r34      };;     // --max
233 { .mmi; fc              r32;;
234         ld4             r8=[r32]
235         cmp4.eq         p6,p0=0,r34     };;
236 { .mmi; mf
237         mov             ar.ccv=r8
238         add             r8=r8,r10       };;
239 { .mmb; cmpxchg4.acq    r3=[r32],r8,ar.ccv
240 (p6)    br.cond.spnt    .Ldone2         };;
241
242 { .mmi; mov             r8=ar.itc;;
243         sub             r10=r8,r9               // diff=tick-lasttick
244         mov             r9=r8           };;     // lasttick=tick
245 { .mmi; cmp.ne          p6,p0=r10,r11;;         // diff!=lastdiff
246 (p6)    add             r33=-1,r33      };;     // conditional --cnt
247 { .mib; cmp4.ne         p7,p0=0,r33
248 (p6)    add             r32=4,r32               // conditional ++out
249 (p7)    br.cond.dptk    .Loop2          };;
250 .Ldone2:
251 { .mib; sub             r8=r2,r33
252         br.ret.sptk.many        b0      };;
253 .endp   OPENSSL_instrument_bus2#