2 # I have this in perl so I can use more usefull register names and then convert
3 # them into alpha registers.
34 # The bn_div_words is actually gcc output but the other parts are hand done.
35 # Thanks to tzeruch@ceddec.com for sending me the gcc output for
37 # I've gone back and re-done most of routines.
38 # The key thing to remeber for the 164 CPU is that while a
39 # multiply operation takes 8 cycles, another one can only be issued
40 # after 4 cycles have elapsed. I've done modification to help
41 # improve this. Also, normally, a ld instruction will not be available
49 .globl bn_mul_add_words
58 blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
63 mulq $A1,$19,$L1 # 1 2 1 ######
66 umulh $A1,$19,$A1 # 1 2 ######
69 mulq $A2,$19,$L2 # 2 2 1 ######
71 addq $R1,$L1,$R1 # 1 2 2
73 umulh $A2,$19,$A2 # 2 2 ######
74 cmpult $R1,$L1,$O1 # 1 2 3 1
75 addq $A1,$O1,$A1 # 1 3 1
76 addq $R1,$CC,$R1 # 1 2 3 1
77 mulq $A3,$19,$L3 # 3 2 1 ######
78 cmpult $R1,$CC,$CC # 1 2 3 2
79 addq $R2,$L2,$R2 # 2 2 2
80 addq $A1,$CC,$CC # 1 3 2
81 cmpult $R2,$L2,$O2 # 2 2 3 1
82 addq $A2,$O2,$A2 # 2 3 1
83 umulh $A3,$19,$A3 # 3 2 ######
84 addq $R2,$CC,$R2 # 2 2 3 1
85 cmpult $R2,$CC,$CC # 2 2 3 2
87 mulq $A4,$19,$L4 # 4 2 1 ######
88 addq $A2,$CC,$CC # 2 3 2
89 addq $R3,$L3,$R3 # 3 2 2
91 cmpult $R3,$L3,$O3 # 3 2 3 1
92 stq $R1,-32($16) # 1 2 4
93 umulh $A4,$19,$A4 # 4 2 ######
94 addq $A3,$O3,$A3 # 3 3 1
95 addq $R3,$CC,$R3 # 3 2 3 1
96 stq $R2,-24($16) # 2 2 4
97 cmpult $R3,$CC,$CC # 3 2 3 2
98 stq $R3,-16($16) # 3 2 4
99 addq $R4,$L4,$R4 # 4 2 2
100 addq $A3,$CC,$CC # 3 3 2
101 cmpult $R4,$L4,$O4 # 4 2 3 1
103 addq $A4,$O4,$A4 # 4 3 1
104 addq $R4,$CC,$R4 # 4 2 3 1
105 cmpult $R4,$CC,$CC # 4 2 3 2
106 stq $R4,-8($16) # 4 2 4
107 addq $A4,$CC,$CC # 4 3 2
119 mulq $A1,$19,$L1 # 4 2 1
123 umulh $A1,$19,$A1 # 4 2
124 addq $R1,$L1,$R1 # 4 2 2
125 cmpult $R1,$L1,$O1 # 4 2 3 1
126 addq $A1,$O1,$A1 # 4 3 1
127 addq $R1,$CC,$R1 # 4 2 3 1
128 cmpult $R1,$CC,$CC # 4 2 3 2
129 addq $A1,$CC,$CC # 4 3 2
130 stq $R1,-8($16) # 4 2 4
132 ret $31,($26),1 # else exit
137 bgt $18,$45 # goto tail code
138 ret $31,($26),1 # else exit
140 .end bn_mul_add_words
151 blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
156 mulq $A1,$19,$L1 # 1 2 1 #####
158 ldq $A3,16($17) # 3 1
159 umulh $A1,$19,$A1 # 1 2 #####
160 ldq $A4,24($17) # 4 1
161 mulq $A2,$19,$L2 # 2 2 1 #####
162 addq $L1,$CC,$L1 # 1 2 3 1
164 cmpult $L1,$CC,$CC # 1 2 3 2
165 umulh $A2,$19,$A2 # 2 2 #####
166 addq $A1,$CC,$CC # 1 3 2
168 addq $L2,$CC,$L2 # 2 2 3 1
169 mulq $A3,$19,$L3 # 3 2 1 #####
170 cmpult $L2,$CC,$CC # 2 2 3 2
171 addq $A2,$CC,$CC # 2 3 2
173 umulh $A3,$19,$A3 # 3 2 #####
174 stq $L1,-32($16) # 1 2 4
175 mulq $A4,$19,$L4 # 4 2 1 #####
176 addq $L3,$CC,$L3 # 3 2 3 1
177 stq $L2,-24($16) # 2 2 4
178 cmpult $L3,$CC,$CC # 3 2 3 2
179 umulh $A4,$19,$A4 # 4 2 #####
180 addq $A3,$CC,$CC # 3 3 2
181 stq $L3,-16($16) # 3 2 4
182 addq $L4,$CC,$L4 # 4 2 3 1
183 cmpult $L4,$CC,$CC # 4 2 3 2
185 addq $A4,$CC,$CC # 4 3 2
187 stq $L4,-8($16) # 4 2 4
198 mulq $A1,$19,$L1 # 4 2 1
200 umulh $A1,$19,$A1 # 4 2
201 addq $L1,$CC,$L1 # 4 2 3 1
203 cmpult $L1,$CC,$CC # 4 2 3 2
205 addq $A1,$CC,$CC # 4 3 2
206 stq $L1,-8($16) # 4 2 4
209 ret $31,($26),1 # else exit
214 bgt $18,$145 # goto tail code
215 ret $31,($26),1 # else exit
227 blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
231 mulq $A1,$A1,$L1 ######
234 umulh $A1,$A1,$R1 ######
235 ldq $A3,16($17) # 1 1
236 mulq $A2,$A2,$L2 ######
237 ldq $A4,24($17) # 1 1
238 stq $L1,0($16) # r[0]
239 umulh $A2,$A2,$R2 ######
240 stq $R1,8($16) # r[1]
241 mulq $A3,$A3,$L3 ######
242 stq $L2,16($16) # r[0]
243 umulh $A3,$A3,$R3 ######
244 stq $R2,24($16) # r[1]
245 mulq $A4,$A4,$L4 ######
246 stq $L3,32($16) # r[0]
247 umulh $A4,$A4,$R4 ######
248 stq $R3,40($16) # r[1]
252 stq $L4,-16($16) # r[0]
253 stq $R4,-8($16) # r[1]
260 ldq $A1,0($17) # a[0]
261 mulq $A1,$A1,$L1 # a[0]*w low part r2
265 umulh $A1,$A1,$R1 # a[0]*w high part r3
266 stq $L1,-16($16) # r[0]
267 stq $R1,-8($16) # r[1]
270 ret $31,($26),1 # else exit
275 bgt $18,$442 # goto tail code
276 ret $31,($26),1 # else exit
288 bis $31,$31,$CC # carry = 0
290 ldq $L1,0($17) # a[0]
291 ldq $R1,0($18) # b[1]
294 addq $R1,$L1,$R1 # r=a+b;
295 ldq $L2,8($17) # a[1]
296 cmpult $R1,$L1,$O1 # did we overflow?
297 ldq $R2,8($18) # b[1]
298 addq $R1,$CC,$R1 # c+= overflow
299 ldq $L3,16($17) # a[2]
300 cmpult $R1,$CC,$CC # overflow?
301 ldq $R3,16($18) # b[2]
303 ldq $L4,24($17) # a[3]
304 addq $R2,$L2,$R2 # r=a+b;
305 ldq $R4,24($18) # b[3]
306 cmpult $R2,$L2,$O2 # did we overflow?
307 addq $R3,$L3,$R3 # r=a+b;
308 addq $R2,$CC,$R2 # c+= overflow
309 cmpult $R3,$L3,$O3 # did we overflow?
310 cmpult $R2,$CC,$CC # overflow?
311 addq $R4,$L4,$R4 # r=a+b;
313 cmpult $R4,$L4,$O4 # did we overflow?
314 addq $R3,$CC,$R3 # c+= overflow
315 stq $R1,0($16) # r[0]=c
316 cmpult $R3,$CC,$CC # overflow?
317 stq $R2,8($16) # r[1]=c
319 stq $R3,16($16) # r[2]=c
320 addq $R4,$CC,$R4 # c+= overflow
321 subq $19,4,$19 # loop--
322 cmpult $R4,$CC,$CC # overflow?
323 addq $17,32,$17 # a++
325 stq $R4,24($16) # r[3]=c
326 addq $18,32,$18 # b++
327 addq $16,32,$16 # r++
330 ldq $L1,0($17) # a[0]
331 ldq $R1,0($18) # b[1]
335 ldq $L1,0($17) # a[0]
336 ldq $R1,0($18) # b[1]
337 addq $R1,$L1,$R1 # r=a+b;
338 subq $19,1,$19 # loop--
339 addq $R1,$CC,$R1 # c+= overflow
341 cmpult $R1,$L1,$O1 # did we overflow?
342 cmpult $R1,$CC,$CC # overflow?
344 stq $R1,0($16) # r[0]=c
349 ret $31,($26),1 # else exit
353 bgt $19,$945 # goto tail code
354 ret $31,($26),1 # else exit
366 bis $31,$31,$CC # carry = 0
369 ldq $L1,0($17) # a[0]
370 ldq $R1,0($18) # b[1]
373 addq $R1,$L1,$R1 # r=a+b;
374 ldq $L2,8($17) # a[1]
375 cmpult $R1,$L1,$O1 # did we overflow?
376 ldq $R2,8($18) # b[1]
377 addq $R1,$CC,$R1 # c+= overflow
378 ldq $L3,16($17) # a[2]
379 cmpult $R1,$CC,$CC # overflow?
380 ldq $R3,16($18) # b[2]
382 ldq $L4,24($17) # a[3]
383 addq $R2,$L2,$R2 # r=a+b;
384 ldq $R4,24($18) # b[3]
385 cmpult $R2,$L2,$O2 # did we overflow?
386 addq $R3,$L3,$R3 # r=a+b;
387 addq $R2,$CC,$R2 # c+= overflow
388 cmpult $R3,$L3,$O3 # did we overflow?
389 cmpult $R2,$CC,$CC # overflow?
390 addq $R4,$L4,$R4 # r=a+b;
392 cmpult $R4,$L4,$O4 # did we overflow?
393 addq $R3,$CC,$R3 # c+= overflow
394 stq $R1,0($16) # r[0]=c
395 cmpult $R3,$CC,$CC # overflow?
396 stq $R2,8($16) # r[1]=c
398 stq $R3,16($16) # r[2]=c
399 addq $R4,$CC,$R4 # c+= overflow
400 subq $19,4,$19 # loop--
401 cmpult $R4,$CC,$CC # overflow?
402 addq $17,32,$17 # a++
404 stq $R4,24($16) # r[3]=c
405 addq $18,32,$18 # b++
406 addq $16,32,$16 # r++
409 ldq $L1,0($17) # a[0]
410 ldq $R1,0($18) # b[1]
414 ldq $L1,0($17) # a[0]
415 ldq $R1,0($18) # b[1]
416 cmpult $L1,$R1,$O1 # will we borrow?
417 subq $L1,$R1,$R1 # r=a-b;
418 subq $19,1,$19 # loop--
419 cmpult $R1,$CC,$O2 # will we borrow?
420 subq $R1,$CC,$R1 # c+= overflow
423 stq $R1,0($16) # r[0]=c
428 ret $31,($26),1 # else exit
432 bgt $19,$845 # goto tail code
433 ret $31,($26),1 # else exit
437 # What follows was taken directly from the C compiler with a few
438 # hacks to redo the lables.
468 jsr $26,BN_num_bits_word
476 # lda $16,_IO_stderr_