2 # The bn_div_words is actually gcc output but the other parts are hand done.
3 # Thanks to tzeruch@ceddec.com for sending me the gcc output for
5 # I've gone back and re-done most of routines.
6 # The key thing to remeber for the 164 CPU is that while a
7 # multiply operation takes 8 cycles, another one can only be issued
8 # after 4 cycles have elapsed. I've done modification to help
9 # improve this. Also, normally, a ld instruction will not be available
17 .globl bn_mul_add_words
26 blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
31 mulq $20,$19,$5 # 1 2 1 ######
34 umulh $20,$19,$20 # 1 2 ######
37 mulq $21,$19,$6 # 2 2 1 ######
41 umulh $21,$19,$21 # 2 2 ######
42 cmpult $1,$5,$22 # 1 2 3 1
43 addq $20,$22,$20 # 1 3 1
44 addq $1,$0,$1 # 1 2 3 1
45 mulq $27,$19,$7 # 3 2 1 ######
46 cmpult $1,$0,$0 # 1 2 3 2
48 addq $20,$0,$0 # 1 3 2
49 cmpult $2,$6,$23 # 2 2 3 1
50 addq $21,$23,$21 # 2 3 1
51 umulh $27,$19,$27 # 3 2 ######
52 addq $2,$0,$2 # 2 2 3 1
53 cmpult $2,$0,$0 # 2 2 3 2
55 mulq $28,$19,$8 # 4 2 1 ######
56 addq $21,$0,$0 # 2 3 2
59 cmpult $3,$7,$24 # 3 2 3 1
60 stq $1,-32($16) # 1 2 4
61 umulh $28,$19,$28 # 4 2 ######
62 addq $27,$24,$27 # 3 3 1
63 addq $3,$0,$3 # 3 2 3 1
64 stq $2,-24($16) # 2 2 4
65 cmpult $3,$0,$0 # 3 2 3 2
66 stq $3,-16($16) # 3 2 4
68 addq $27,$0,$0 # 3 3 2
69 cmpult $4,$8,$25 # 4 2 3 1
71 addq $28,$25,$28 # 4 3 1
72 addq $4,$0,$4 # 4 2 3 1
73 cmpult $4,$0,$0 # 4 2 3 2
74 stq $4,-8($16) # 4 2 4
75 addq $28,$0,$0 # 4 3 2
87 mulq $20,$19,$5 # 4 2 1
91 umulh $20,$19,$20 # 4 2
93 cmpult $1,$5,$22 # 4 2 3 1
94 addq $20,$22,$20 # 4 3 1
95 addq $1,$0,$1 # 4 2 3 1
96 cmpult $1,$0,$0 # 4 2 3 2
97 addq $20,$0,$0 # 4 3 2
98 stq $1,-8($16) # 4 2 4
100 ret $31,($26),1 # else exit
105 bgt $18,$45 # goto tail code
106 ret $31,($26),1 # else exit
108 .end bn_mul_add_words
119 blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
124 mulq $20,$19,$5 # 1 2 1 #####
126 ldq $27,16($17) # 3 1
127 umulh $20,$19,$20 # 1 2 #####
128 ldq $28,24($17) # 4 1
129 mulq $21,$19,$6 # 2 2 1 #####
130 addq $5,$0,$5 # 1 2 3 1
132 cmpult $5,$0,$0 # 1 2 3 2
133 umulh $21,$19,$21 # 2 2 #####
134 addq $20,$0,$0 # 1 3 2
136 addq $6,$0,$6 # 2 2 3 1
137 mulq $27,$19,$7 # 3 2 1 #####
138 cmpult $6,$0,$0 # 2 2 3 2
139 addq $21,$0,$0 # 2 3 2
141 umulh $27,$19,$27 # 3 2 #####
142 stq $5,-32($16) # 1 2 4
143 mulq $28,$19,$8 # 4 2 1 #####
144 addq $7,$0,$7 # 3 2 3 1
145 stq $6,-24($16) # 2 2 4
146 cmpult $7,$0,$0 # 3 2 3 2
147 umulh $28,$19,$28 # 4 2 #####
148 addq $27,$0,$0 # 3 3 2
149 stq $7,-16($16) # 3 2 4
150 addq $8,$0,$8 # 4 2 3 1
151 cmpult $8,$0,$0 # 4 2 3 2
153 addq $28,$0,$0 # 4 3 2
155 stq $8,-8($16) # 4 2 4
166 mulq $20,$19,$5 # 4 2 1
168 umulh $20,$19,$20 # 4 2
169 addq $5,$0,$5 # 4 2 3 1
171 cmpult $5,$0,$0 # 4 2 3 2
173 addq $20,$0,$0 # 4 3 2
174 stq $5,-8($16) # 4 2 4
177 ret $31,($26),1 # else exit
182 bgt $18,$145 # goto tail code
183 ret $31,($26),1 # else exit
195 blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
199 mulq $20,$20,$5 ######
202 umulh $20,$20,$1 ######
203 ldq $27,16($17) # 1 1
204 mulq $21,$21,$6 ######
205 ldq $28,24($17) # 1 1
207 umulh $21,$21,$2 ######
209 mulq $27,$27,$7 ######
210 stq $6,16($16) # r[0]
211 umulh $27,$27,$3 ######
212 stq $2,24($16) # r[1]
213 mulq $28,$28,$8 ######
214 stq $7,32($16) # r[0]
215 umulh $28,$28,$4 ######
216 stq $3,40($16) # r[1]
220 stq $8,-16($16) # r[0]
221 stq $4,-8($16) # r[1]
228 ldq $20,0($17) # a[0]
229 mulq $20,$20,$5 # a[0]*w low part r2
233 umulh $20,$20,$1 # a[0]*w high part r3
234 stq $5,-16($16) # r[0]
235 stq $1,-8($16) # r[1]
238 ret $31,($26),1 # else exit
243 bgt $18,$442 # goto tail code
244 ret $31,($26),1 # else exit
256 bis $31,$31,$0 # carry = 0
262 addq $1,$5,$1 # r=a+b;
264 cmpult $1,$5,$22 # did we overflow?
266 addq $1,$0,$1 # c+= overflow
267 ldq $7,16($17) # a[2]
268 cmpult $1,$0,$0 # overflow?
269 ldq $3,16($18) # b[2]
271 ldq $8,24($17) # a[3]
272 addq $2,$6,$2 # r=a+b;
273 ldq $4,24($18) # b[3]
274 cmpult $2,$6,$23 # did we overflow?
275 addq $3,$7,$3 # r=a+b;
276 addq $2,$0,$2 # c+= overflow
277 cmpult $3,$7,$24 # did we overflow?
278 cmpult $2,$0,$0 # overflow?
279 addq $4,$8,$4 # r=a+b;
281 cmpult $4,$8,$25 # did we overflow?
282 addq $3,$0,$3 # c+= overflow
283 stq $1,0($16) # r[0]=c
284 cmpult $3,$0,$0 # overflow?
285 stq $2,8($16) # r[1]=c
287 stq $3,16($16) # r[2]=c
288 addq $4,$0,$4 # c+= overflow
289 subq $19,4,$19 # loop--
290 cmpult $4,$0,$0 # overflow?
291 addq $17,32,$17 # a++
293 stq $4,24($16) # r[3]=c
294 addq $18,32,$18 # b++
295 addq $16,32,$16 # r++
305 addq $1,$5,$1 # r=a+b;
306 subq $19,1,$19 # loop--
307 addq $1,$0,$1 # c+= overflow
309 cmpult $1,$5,$22 # did we overflow?
310 cmpult $1,$0,$0 # overflow?
312 stq $1,0($16) # r[0]=c
317 ret $31,($26),1 # else exit
321 bgt $19,$945 # goto tail code
322 ret $31,($26),1 # else exit
326 # What follows was taken directly from the C compiler with a few
327 # hacks to redo the lables.
357 jsr $26,BN_num_bits_word
365 # lda $16,_IO_stderr_
2562 .globl bn_sqr_comba4
2702 .globl bn_sqr_comba8
2732 cmpult $22, $24, $21
2733 cmpult $23, $25, $20
2742 cmpult $23, $19, $17
2756 cmpult $23, $28, $20
2773 cmpult $22, $17, $28
2787 cmpult $22, $20, $18
2796 cmpult $22, $17, $28
2797 cmpult $23, $21, $19
2810 cmpult $22, $27, $18
2811 cmpult $23, $24, $17
2824 cmpult $22, $21, $25
2825 cmpult $23, $28, $27
2840 cmpult $23, $24, $20
2854 cmpult $23, $28, $19
2868 cmpult $23, $18, $17
2879 cmpult $22, $19, $21
2893 cmpult $22, $18, $25
2907 cmpult $22, $24, $27
2921 cmpult $22, $28, $19
2936 cmpult $22, $24, $21
2937 cmpult $23, $17, $18
2950 cmpult $22, $28, $25
2951 cmpult $23, $20, $24
2964 cmpult $22, $17, $27
2965 cmpult $23, $21, $28
2978 cmpult $22, $20, $19
2979 cmpult $23, $25, $17
2988 cmpult $23, $21, $28
3002 cmpult $23, $18, $17
3016 cmpult $23, $27, $19
3030 cmpult $23, $20, $25
3047 cmpult $22, $19, $20
3061 cmpult $22, $25, $28
3075 cmpult $22, $24, $17
3084 cmpult $22, $25, $28
3085 cmpult $23, $21, $20
3098 cmpult $22, $27, $17
3099 cmpult $23, $19, $25
3112 cmpult $22, $21, $18
3113 cmpult $23, $28, $27
3128 cmpult $23, $19, $24
3142 cmpult $23, $28, $20
3153 cmpult $22, $24, $27
3167 cmpult $22, $28, $17
3182 cmpult $22, $24, $20
3183 cmpult $23, $21, $25
3192 cmpult $23, $28, $17