blob: 10167ae9b3d6b93c24f5953b2457eb3395267648 [file] [log] [blame]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001#!/usr/local/bin/perl
2
3$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
Robert Sloan8ff03552017-06-14 12:40:58 -07004push(@INC,"${dir}","${dir}../../../perlasm");
Adam Langleyd9e397b2015-01-22 14:27:53 -08005require "x86asm.pl";
6
David Benjaminc895d6b2016-08-11 13:26:41 -04007$output = pop;
8open STDOUT,">$output";
9
Robert Sloan8ff03552017-06-14 12:40:58 -070010&asm_init($ARGV[0]);
Adam Langleyd9e397b2015-01-22 14:27:53 -080011
12&bn_mul_comba("bn_mul_comba8",8);
13&bn_mul_comba("bn_mul_comba4",4);
14&bn_sqr_comba("bn_sqr_comba8",8);
15&bn_sqr_comba("bn_sqr_comba4",4);
16
17&asm_finish();
18
David Benjaminc895d6b2016-08-11 13:26:41 -040019close STDOUT;
20
Adam Langleyd9e397b2015-01-22 14:27:53 -080021sub mul_add_c
22 {
23 local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
24
25 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
26 # words, and 1 if load return value
27
28 &comment("mul a[$ai]*b[$bi]");
29
30 # "eax" and "edx" will always be pre-loaded.
31 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
32 # &mov("edx",&DWP($bi*4,$b,"",0));
33
34 &mul("edx");
35 &add($c0,"eax");
36 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
37 &mov("eax",&wparam(0)) if $pos > 0; # load r[]
38 ###
39 &adc($c1,"edx");
40 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
41 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
42 ###
43 &adc($c2,0);
Robert Sloana94fe052017-02-21 08:49:28 -080044 # is pos > 1, it means it is the last loop
Adam Langleyd9e397b2015-01-22 14:27:53 -080045 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
46 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
47 }
48
49sub sqr_add_c
50 {
51 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
52
53 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
54 # words, and 1 if load return value
55
56 &comment("sqr a[$ai]*a[$bi]");
57
58 # "eax" and "edx" will always be pre-loaded.
59 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
60 # &mov("edx",&DWP($bi*4,$b,"",0));
61
62 if ($ai == $bi)
63 { &mul("eax");}
64 else
65 { &mul("edx");}
66 &add($c0,"eax");
67 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
68 ###
69 &adc($c1,"edx");
70 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
71 ###
72 &adc($c2,0);
Robert Sloana94fe052017-02-21 08:49:28 -080073 # is pos > 1, it means it is the last loop
Adam Langleyd9e397b2015-01-22 14:27:53 -080074 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
75 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
76 }
77
78sub sqr_add_c2
79 {
80 local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
81
82 # pos == -1 if eax and edx are pre-loaded, 0 to load from next
83 # words, and 1 if load return value
84
85 &comment("sqr a[$ai]*a[$bi]");
86
87 # "eax" and "edx" will always be pre-loaded.
88 # &mov("eax",&DWP($ai*4,$a,"",0)) ;
89 # &mov("edx",&DWP($bi*4,$a,"",0));
90
91 if ($ai == $bi)
92 { &mul("eax");}
93 else
94 { &mul("edx");}
95 &add("eax","eax");
96 ###
97 &adc("edx","edx");
98 ###
99 &adc($c2,0);
100 &add($c0,"eax");
101 &adc($c1,"edx");
102 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
103 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
104 &adc($c2,0);
105 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
106 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
107 ###
108 }
109
110sub bn_mul_comba
111 {
112 local($name,$num)=@_;
113 local($a,$b,$c0,$c1,$c2);
114 local($i,$as,$ae,$bs,$be,$ai,$bi);
115 local($tot,$end);
116
117 &function_begin_B($name,"");
118
119 $c0="ebx";
120 $c1="ecx";
121 $c2="ebp";
122 $a="esi";
123 $b="edi";
Robert Sloana94fe052017-02-21 08:49:28 -0800124
Adam Langleyd9e397b2015-01-22 14:27:53 -0800125 $as=0;
126 $ae=0;
127 $bs=0;
128 $be=0;
129 $tot=$num+$num-1;
130
131 &push("esi");
132 &mov($a,&wparam(1));
133 &push("edi");
134 &mov($b,&wparam(2));
135 &push("ebp");
136 &push("ebx");
137
138 &xor($c0,$c0);
Robert Sloana94fe052017-02-21 08:49:28 -0800139 &mov("eax",&DWP(0,$a,"",0)); # load the first word
Adam Langleyd9e397b2015-01-22 14:27:53 -0800140 &xor($c1,$c1);
Robert Sloana94fe052017-02-21 08:49:28 -0800141 &mov("edx",&DWP(0,$b,"",0)); # load the first second
Adam Langleyd9e397b2015-01-22 14:27:53 -0800142
143 for ($i=0; $i<$tot; $i++)
144 {
145 $ai=$as;
146 $bi=$bs;
147 $end=$be+1;
148
Robert Sloana94fe052017-02-21 08:49:28 -0800149 &comment("################## Calculate word $i");
Adam Langleyd9e397b2015-01-22 14:27:53 -0800150
151 for ($j=$bs; $j<$end; $j++)
152 {
153 &xor($c2,$c2) if ($j == $bs);
154 if (($j+1) == $end)
155 {
156 $v=1;
157 $v=2 if (($i+1) == $tot);
158 }
159 else
160 { $v=0; }
161 if (($j+1) != $end)
162 {
163 $na=($ai-1);
164 $nb=($bi+1);
165 }
166 else
167 {
168 $na=$as+($i < ($num-1));
169 $nb=$bs+($i >= ($num-1));
170 }
171#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
172 &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
173 if ($v)
174 {
175 &comment("saved r[$i]");
176 # &mov("eax",&wparam(0));
177 # &mov(&DWP($i*4,"eax","",0),$c0);
178 ($c0,$c1,$c2)=($c1,$c2,$c0);
179 }
180 $ai--;
181 $bi++;
182 }
183 $as++ if ($i < ($num-1));
184 $ae++ if ($i >= ($num-1));
185
186 $bs++ if ($i >= ($num-1));
187 $be++ if ($i < ($num-1));
188 }
189 &comment("save r[$i]");
190 # &mov("eax",&wparam(0));
191 &mov(&DWP($i*4,"eax","",0),$c0);
192
193 &pop("ebx");
194 &pop("ebp");
195 &pop("edi");
196 &pop("esi");
197 &ret();
198 &function_end_B($name);
199 }
200
201sub bn_sqr_comba
202 {
203 local($name,$num)=@_;
204 local($r,$a,$c0,$c1,$c2)=@_;
205 local($i,$as,$ae,$bs,$be,$ai,$bi);
206 local($b,$tot,$end,$half);
207
208 &function_begin_B($name,"");
209
210 $c0="ebx";
211 $c1="ecx";
212 $c2="ebp";
213 $a="esi";
214 $r="edi";
215
216 &push("esi");
217 &push("edi");
218 &push("ebp");
219 &push("ebx");
220 &mov($r,&wparam(0));
221 &mov($a,&wparam(1));
222 &xor($c0,$c0);
223 &xor($c1,$c1);
224 &mov("eax",&DWP(0,$a,"",0)); # load the first word
225
226 $as=0;
227 $ae=0;
228 $bs=0;
229 $be=0;
230 $tot=$num+$num-1;
231
232 for ($i=0; $i<$tot; $i++)
233 {
234 $ai=$as;
235 $bi=$bs;
236 $end=$be+1;
237
238 &comment("############### Calculate word $i");
239 for ($j=$bs; $j<$end; $j++)
240 {
241 &xor($c2,$c2) if ($j == $bs);
242 if (($ai-1) < ($bi+1))
243 {
244 $v=1;
245 $v=2 if ($i+1) == $tot;
246 }
247 else
248 { $v=0; }
249 if (!$v)
250 {
251 $na=$ai-1;
252 $nb=$bi+1;
253 }
254 else
255 {
256 $na=$as+($i < ($num-1));
257 $nb=$bs+($i >= ($num-1));
258 }
259 if ($ai == $bi)
260 {
261 &sqr_add_c($r,$a,$ai,$bi,
262 $c0,$c1,$c2,$v,$i,$na,$nb);
263 }
264 else
265 {
266 &sqr_add_c2($r,$a,$ai,$bi,
267 $c0,$c1,$c2,$v,$i,$na,$nb);
268 }
269 if ($v)
270 {
271 &comment("saved r[$i]");
272 #&mov(&DWP($i*4,$r,"",0),$c0);
273 ($c0,$c1,$c2)=($c1,$c2,$c0);
274 last;
275 }
276 $ai--;
277 $bi++;
278 }
279 $as++ if ($i < ($num-1));
280 $ae++ if ($i >= ($num-1));
281
282 $bs++ if ($i >= ($num-1));
283 $be++ if ($i < ($num-1));
284 }
285 &mov(&DWP($i*4,$r,"",0),$c0);
286 &pop("ebx");
287 &pop("ebp");
288 &pop("edi");
289 &pop("esi");
290 &ret();
291 &function_end_B($name);
292 }