3 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4 push(@INC,"${dir}","${dir}../../perlasm");
8 open STDOUT,">$output";
10 &asm_init($ARGV[0],$0);
12 &bn_mul_comba("bn_mul_comba8",8);
13 &bn_mul_comba("bn_mul_comba4",4);
14 &bn_sqr_comba("bn_sqr_comba8",8);
15 &bn_sqr_comba("bn_sqr_comba4",4);
23 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
25 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
26 # words, and 1 if load return value
28 &comment("mul a[$ai]*b[$bi]");
30 # "eax" and "edx" will always be pre-loaded.
31 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
32 # &mov("edx",&DWP($bi*4,$b,"",0));
36 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
37 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
40 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
41 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
44 # is pos > 1, it means it is the last loop
45 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
46 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
51 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
53 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
54 # words, and 1 if load return value
56 &comment("sqr a[$ai]*a[$bi]");
58 # "eax" and "edx" will always be pre-loaded.
59 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
60 # &mov("edx",&DWP($bi*4,$b,"",0));
67 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
70 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
73 # is pos > 1, it means it is the last loop
74 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
75 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
80 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
82 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
83 # words, and 1 if load return value
85 &comment("sqr a[$ai]*a[$bi]");
87 # "eax" and "edx" will always be pre-loaded.
88 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
89 # &mov("edx",&DWP($bi*4,$a,"",0));
102 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
103 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
105 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
106 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
112 local($name,$num)=@_;
113 local($a,$b,$c0,$c1,$c2);
114 local($i,$as,$ae,$bs,$be,$ai,$bi);
117 &function_begin_B($name,"");
139 &mov("eax",&DWP(0,$a,"",0)); # load the first word
141 &mov("edx",&DWP(0,$b,"",0)); # load the first second
143 for ($i=0; $i<$tot; $i++)
149 &comment("################## Calculate word $i");
151 for ($j=$bs; $j<$end; $j++)
153 &xor($c2,$c2) if ($j == $bs);
157 $v=2 if (($i+1) == $tot);
168 $na=$as+($i < ($num-1));
169 $nb=$bs+($i >= ($num-1));
171 #printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
172 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
175 &comment("saved r[$i]");
176 # &mov("eax",&wparam(0));
177 # &mov(&DWP($i*4,"eax","",0),$c0);
178 ($c0,$c1,$c2)=($c1,$c2,$c0);
183 $as++ if ($i < ($num-1));
184 $ae++ if ($i >= ($num-1));
186 $bs++ if ($i >= ($num-1));
187 $be++ if ($i < ($num-1));
189 &comment("save r[$i]");
190 # &mov("eax",&wparam(0));
191 &mov(&DWP($i*4,"eax","",0),$c0);
198 &function_end_B($name);
203 local($name,$num)=@_;
204 local($r,$a,$c0,$c1,$c2)=@_;
205 local($i,$as,$ae,$bs,$be,$ai,$bi);
206 local($b,$tot,$end,$half);
208 &function_begin_B($name,"");
224 &mov("eax",&DWP(0,$a,"",0)); # load the first word
232 for ($i=0; $i<$tot; $i++)
238 &comment("############### Calculate word $i");
239 for ($j=$bs; $j<$end; $j++)
241 &xor($c2,$c2) if ($j == $bs);
242 if (($ai-1) < ($bi+1))
245 $v=2 if ($i+1) == $tot;
256 $na=$as+($i < ($num-1));
257 $nb=$bs+($i >= ($num-1));
261 &sqr_add_c($r,$a,$ai,$bi,
262 $c0,$c1,$c2,$v,$i,$na,$nb);
266 &sqr_add_c2($r,$a,$ai,$bi,
267 $c0,$c1,$c2,$v,$i,$na,$nb);
271 &comment("saved r[$i]");
272 #&mov(&DWP($i*4,$r,"",0),$c0);
273 ($c0,$c1,$c2)=($c1,$c2,$c0);
279 $as++ if ($i < ($num-1));
280 $ae++ if ($i >= ($num-1));
282 $bs++ if ($i >= ($num-1));
283 $be++ if ($i < ($num-1));
285 &mov(&DWP($i*4,$r,"",0),$c0);
291 &function_end_B($name);