PPC assembly pack: remove branch hints.
[oweals/openssl.git] / crypto / bn / asm / co-586.pl
1 #!/usr/local/bin/perl
2
3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4 push(@INC,"${dir}","${dir}../../perlasm");
5 require "x86asm.pl";
6
7 $output = pop;
8 open STDOUT,">$output";
9
10 &asm_init($ARGV[0],$0);
11
12 &bn_mul_comba("bn_mul_comba8",8);
13 &bn_mul_comba("bn_mul_comba4",4);
14 &bn_sqr_comba("bn_sqr_comba8",8);
15 &bn_sqr_comba("bn_sqr_comba4",4);
16
17 &asm_finish();
18
19 close STDOUT;
20
21 sub mul_add_c
22         {
23         local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
24
25         # pos == -1 if eax and edx are pre-loaded, 0 to load from next
26         # words, and 1 if load return value
27
28         &comment("mul a[$ai]*b[$bi]");
29
30         # "eax" and "edx" will always be pre-loaded.
31         # &mov("eax",&DWP($ai*4,$a,"",0)) ;
32         # &mov("edx",&DWP($bi*4,$b,"",0));
33
34         &mul("edx");
35         &add($c0,"eax");
36          &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;        # laod next a
37          &mov("eax",&wparam(0)) if $pos > 0;                    # load r[]
38          ###
39         &adc($c1,"edx");
40          &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0;        # laod next b
41          &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1;        # laod next b
42          ###
43         &adc($c2,0);
44          # is pos > 1, it means it is the last loop 
45          &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0;           # save r[];
46         &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;         # laod next a
47         }
48
49 sub sqr_add_c
50         {
51         local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
52
53         # pos == -1 if eax and edx are pre-loaded, 0 to load from next
54         # words, and 1 if load return value
55
56         &comment("sqr a[$ai]*a[$bi]");
57
58         # "eax" and "edx" will always be pre-loaded.
59         # &mov("eax",&DWP($ai*4,$a,"",0)) ;
60         # &mov("edx",&DWP($bi*4,$b,"",0));
61
62         if ($ai == $bi)
63                 { &mul("eax");}
64         else
65                 { &mul("edx");}
66         &add($c0,"eax");
67          &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;        # load next a
68          ###
69         &adc($c1,"edx");
70          &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
71          ###
72         &adc($c2,0);
73          # is pos > 1, it means it is the last loop 
74          &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;              # save r[];
75         &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;         # load next b
76         }
77
78 sub sqr_add_c2
79         {
80         local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
81
82         # pos == -1 if eax and edx are pre-loaded, 0 to load from next
83         # words, and 1 if load return value
84
85         &comment("sqr a[$ai]*a[$bi]");
86
87         # "eax" and "edx" will always be pre-loaded.
88         # &mov("eax",&DWP($ai*4,$a,"",0)) ;
89         # &mov("edx",&DWP($bi*4,$a,"",0));
90
91         if ($ai == $bi)
92                 { &mul("eax");}
93         else
94                 { &mul("edx");}
95         &add("eax","eax");
96          ###
97         &adc("edx","edx");
98          ###
99         &adc($c2,0);
100          &add($c0,"eax");
101         &adc($c1,"edx");
102          &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;        # load next a
103          &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;        # load next b
104         &adc($c2,0);
105         &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;               # save r[];
106          &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
107          ###
108         }
109
110 sub bn_mul_comba
111         {
112         local($name,$num)=@_;
113         local($a,$b,$c0,$c1,$c2);
114         local($i,$as,$ae,$bs,$be,$ai,$bi);
115         local($tot,$end);
116
117         &function_begin_B($name,"");
118
119         $c0="ebx";
120         $c1="ecx";
121         $c2="ebp";
122         $a="esi";
123         $b="edi";
124         
125         $as=0;
126         $ae=0;
127         $bs=0;
128         $be=0;
129         $tot=$num+$num-1;
130
131         &push("esi");
132          &mov($a,&wparam(1));
133         &push("edi");
134          &mov($b,&wparam(2));
135         &push("ebp");
136          &push("ebx");
137
138         &xor($c0,$c0);
139          &mov("eax",&DWP(0,$a,"",0));   # load the first word 
140         &xor($c1,$c1);
141          &mov("edx",&DWP(0,$b,"",0));   # load the first second 
142
143         for ($i=0; $i<$tot; $i++)
144                 {
145                 $ai=$as;
146                 $bi=$bs;
147                 $end=$be+1;
148
149                 &comment("################## Calculate word $i"); 
150
151                 for ($j=$bs; $j<$end; $j++)
152                         {
153                         &xor($c2,$c2) if ($j == $bs);
154                         if (($j+1) == $end)
155                                 {
156                                 $v=1;
157                                 $v=2 if (($i+1) == $tot);
158                                 }
159                         else
160                                 { $v=0; }
161                         if (($j+1) != $end)
162                                 {
163                                 $na=($ai-1);
164                                 $nb=($bi+1);
165                                 }
166                         else
167                                 {
168                                 $na=$as+($i < ($num-1));
169                                 $nb=$bs+($i >= ($num-1));
170                                 }
171 #printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
172                         &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
173                         if ($v)
174                                 {
175                                 &comment("saved r[$i]");
176                                 # &mov("eax",&wparam(0));
177                                 # &mov(&DWP($i*4,"eax","",0),$c0);
178                                 ($c0,$c1,$c2)=($c1,$c2,$c0);
179                                 }
180                         $ai--;
181                         $bi++;
182                         }
183                 $as++ if ($i < ($num-1));
184                 $ae++ if ($i >= ($num-1));
185
186                 $bs++ if ($i >= ($num-1));
187                 $be++ if ($i < ($num-1));
188                 }
189         &comment("save r[$i]");
190         # &mov("eax",&wparam(0));
191         &mov(&DWP($i*4,"eax","",0),$c0);
192
193         &pop("ebx");
194         &pop("ebp");
195         &pop("edi");
196         &pop("esi");
197         &ret();
198         &function_end_B($name);
199         }
200
201 sub bn_sqr_comba
202         {
203         local($name,$num)=@_;
204         local($r,$a,$c0,$c1,$c2)=@_;
205         local($i,$as,$ae,$bs,$be,$ai,$bi);
206         local($b,$tot,$end,$half);
207
208         &function_begin_B($name,"");
209
210         $c0="ebx";
211         $c1="ecx";
212         $c2="ebp";
213         $a="esi";
214         $r="edi";
215
216         &push("esi");
217          &push("edi");
218         &push("ebp");
219          &push("ebx");
220         &mov($r,&wparam(0));
221          &mov($a,&wparam(1));
222         &xor($c0,$c0);
223          &xor($c1,$c1);
224         &mov("eax",&DWP(0,$a,"",0)); # load the first word
225
226         $as=0;
227         $ae=0;
228         $bs=0;
229         $be=0;
230         $tot=$num+$num-1;
231
232         for ($i=0; $i<$tot; $i++)
233                 {
234                 $ai=$as;
235                 $bi=$bs;
236                 $end=$be+1;
237
238                 &comment("############### Calculate word $i");
239                 for ($j=$bs; $j<$end; $j++)
240                         {
241                         &xor($c2,$c2) if ($j == $bs);
242                         if (($ai-1) < ($bi+1))
243                                 {
244                                 $v=1;
245                                 $v=2 if ($i+1) == $tot;
246                                 }
247                         else
248                                 { $v=0; }
249                         if (!$v)
250                                 {
251                                 $na=$ai-1;
252                                 $nb=$bi+1;
253                                 }
254                         else
255                                 {
256                                 $na=$as+($i < ($num-1));
257                                 $nb=$bs+($i >= ($num-1));
258                                 }
259                         if ($ai == $bi)
260                                 {
261                                 &sqr_add_c($r,$a,$ai,$bi,
262                                         $c0,$c1,$c2,$v,$i,$na,$nb);
263                                 }
264                         else
265                                 {
266                                 &sqr_add_c2($r,$a,$ai,$bi,
267                                         $c0,$c1,$c2,$v,$i,$na,$nb);
268                                 }
269                         if ($v)
270                                 {
271                                 &comment("saved r[$i]");
272                                 #&mov(&DWP($i*4,$r,"",0),$c0);
273                                 ($c0,$c1,$c2)=($c1,$c2,$c0);
274                                 last;
275                                 }
276                         $ai--;
277                         $bi++;
278                         }
279                 $as++ if ($i < ($num-1));
280                 $ae++ if ($i >= ($num-1));
281
282                 $bs++ if ($i >= ($num-1));
283                 $be++ if ($i < ($num-1));
284                 }
285         &mov(&DWP($i*4,$r,"",0),$c0);
286         &pop("ebx");
287         &pop("ebp");
288         &pop("edi");
289         &pop("esi");
290         &ret();
291         &function_end_B($name);
292         }