summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/asm/bn-586.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn/asm/bn-586.pl')
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl203
1 files changed, 151 insertions, 52 deletions
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
index 26c2685a72..332ef3e91d 100644
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ b/src/lib/libcrypto/bn/asm/bn-586.pl
@@ -1,6 +1,7 @@
1#!/usr/local/bin/perl 1#!/usr/local/bin/perl
2 2
3push(@INC,"perlasm","../../perlasm"); 3$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4push(@INC,"${dir}","${dir}../../perlasm");
4require "x86asm.pl"; 5require "x86asm.pl";
5 6
6&asm_init($ARGV[0],$0); 7&asm_init($ARGV[0],$0);
@@ -24,38 +25,25 @@ sub bn_mul_add_words
24 { 25 {
25 local($name)=@_; 26 local($name)=@_;
26 27
27 &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 28 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
28 29
29 &comment(""); 30 $r="eax";
30 $Low="eax"; 31 $a="edx";
31 $High="edx"; 32 $c="ecx";
32 $a="ebx";
33 $w="ebp";
34 $r="edi";
35 $c="esi";
36
37 &xor($c,$c); # clear carry
38 &mov($r,&wparam(0)); #
39
40 &mov("ecx",&wparam(2)); #
41 &mov($a,&wparam(1)); #
42
43 &and("ecx",0xfffffff8); # num / 8
44 &mov($w,&wparam(3)); #
45
46 &push("ecx"); # Up the stack for a tmp variable
47
48 &jz(&label("maw_finish"));
49 33
50 if ($sse2) { 34 if ($sse2) {
51 &picmeup("eax","OPENSSL_ia32cap_P"); 35 &picmeup("eax","OPENSSL_ia32cap_P");
52 &bt(&DWP(0,"eax"),26); 36 &bt(&DWP(0,"eax"),26);
53 &jnc(&label("maw_loop")); 37 &jnc(&label("maw_non_sse2"));
54 38
55 &movd("mm0",$w); # mm0 = w 39 &mov($r,&wparam(0));
40 &mov($a,&wparam(1));
41 &mov($c,&wparam(2));
42 &movd("mm0",&wparam(3)); # mm0 = w
56 &pxor("mm1","mm1"); # mm1 = carry_in 43 &pxor("mm1","mm1"); # mm1 = carry_in
57 44 &jmp(&label("maw_sse2_entry"));
58 &set_label("maw_sse2_loop",0); 45
46 &set_label("maw_sse2_unrolled",16);
59 &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0] 47 &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
60 &paddq("mm1","mm3"); # mm1 = carry_in + r[0] 48 &paddq("mm1","mm3"); # mm1 = carry_in + r[0]
61 &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0] 49 &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0]
@@ -112,42 +100,82 @@ sub bn_mul_add_words
112 &psrlq("mm1",32); # mm1 = carry6 100 &psrlq("mm1",32); # mm1 = carry6
113 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7] 101 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7]
114 &movd(&DWP(28,$r,"",0),"mm1"); 102 &movd(&DWP(28,$r,"",0),"mm1");
115 &add($r,32); 103 &lea($r,&DWP(32,$r));
116 &psrlq("mm1",32); # mm1 = carry_out 104 &psrlq("mm1",32); # mm1 = carry_out
117 105
118 &sub("ecx",8); 106 &sub($c,8);
107 &jz(&label("maw_sse2_exit"));
108 &set_label("maw_sse2_entry");
109 &test($c,0xfffffff8);
110 &jnz(&label("maw_sse2_unrolled"));
111
112 &set_label("maw_sse2_loop",4);
113 &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
114 &movd("mm3",&DWP(0,$r)); # mm3 = r[i]
115 &pmuludq("mm2","mm0"); # a[i] *= w
116 &lea($a,&DWP(4,$a));
117 &paddq("mm1","mm3"); # carry += r[i]
118 &paddq("mm1","mm2"); # carry += a[i]*w
119 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
120 &sub($c,1);
121 &psrlq("mm1",32); # carry = carry_high
122 &lea($r,&DWP(4,$r));
119 &jnz(&label("maw_sse2_loop")); 123 &jnz(&label("maw_sse2_loop"));
120 124 &set_label("maw_sse2_exit");
121 &movd($c,"mm1"); # c = carry_out 125 &movd("eax","mm1"); # c = carry_out
122 &emms(); 126 &emms();
127 &ret();
123 128
124 &jmp(&label("maw_finish")); 129 &set_label("maw_non_sse2",16);
125 } 130 }
126 131
127 &set_label("maw_loop",0); 132 # function_begin prologue
133 &push("ebp");
134 &push("ebx");
135 &push("esi");
136 &push("edi");
137
138 &comment("");
139 $Low="eax";
140 $High="edx";
141 $a="ebx";
142 $w="ebp";
143 $r="edi";
144 $c="esi";
145
146 &xor($c,$c); # clear carry
147 &mov($r,&wparam(0)); #
148
149 &mov("ecx",&wparam(2)); #
150 &mov($a,&wparam(1)); #
151
152 &and("ecx",0xfffffff8); # num / 8
153 &mov($w,&wparam(3)); #
128 154
129 &mov(&swtmp(0),"ecx"); # 155 &push("ecx"); # Up the stack for a tmp variable
156
157 &jz(&label("maw_finish"));
158
159 &set_label("maw_loop",16);
130 160
131 for ($i=0; $i<32; $i+=4) 161 for ($i=0; $i<32; $i+=4)
132 { 162 {
133 &comment("Round $i"); 163 &comment("Round $i");
134 164
135 &mov("eax",&DWP($i,$a,"",0)); # *a 165 &mov("eax",&DWP($i,$a)); # *a
136 &mul($w); # *a * w 166 &mul($w); # *a * w
137 &add("eax",$c); # L(t)+= *r 167 &add("eax",$c); # L(t)+= c
138 &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
139 &adc("edx",0); # H(t)+=carry 168 &adc("edx",0); # H(t)+=carry
140 &add("eax",$c); # L(t)+=c 169 &add("eax",&DWP($i,$r)); # L(t)+= *r
141 &adc("edx",0); # H(t)+=carry 170 &adc("edx",0); # H(t)+=carry
142 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 171 &mov(&DWP($i,$r),"eax"); # *r= L(t);
143 &mov($c,"edx"); # c= H(t); 172 &mov($c,"edx"); # c= H(t);
144 } 173 }
145 174
146 &comment(""); 175 &comment("");
147 &mov("ecx",&swtmp(0)); #
148 &add($a,32);
149 &add($r,32);
150 &sub("ecx",8); 176 &sub("ecx",8);
177 &lea($a,&DWP(32,$a));
178 &lea($r,&DWP(32,$r));
151 &jnz(&label("maw_loop")); 179 &jnz(&label("maw_loop"));
152 180
153 &set_label("maw_finish",0); 181 &set_label("maw_finish",0);
@@ -160,16 +188,15 @@ sub bn_mul_add_words
160 for ($i=0; $i<7; $i++) 188 for ($i=0; $i<7; $i++)
161 { 189 {
162 &comment("Tail Round $i"); 190 &comment("Tail Round $i");
163 &mov("eax",&DWP($i*4,$a,"",0));# *a 191 &mov("eax",&DWP($i*4,$a)); # *a
164 &mul($w); # *a * w 192 &mul($w); # *a * w
165 &add("eax",$c); # L(t)+=c 193 &add("eax",$c); # L(t)+=c
166 &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
167 &adc("edx",0); # H(t)+=carry 194 &adc("edx",0); # H(t)+=carry
168 &add("eax",$c); 195 &add("eax",&DWP($i*4,$r)); # L(t)+= *r
169 &adc("edx",0); # H(t)+=carry 196 &adc("edx",0); # H(t)+=carry
170 &dec("ecx") if ($i != 7-1); 197 &dec("ecx") if ($i != 7-1);
171 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); 198 &mov(&DWP($i*4,$r),"eax"); # *r= L(t);
172 &mov($c,"edx"); # c= H(t); 199 &mov($c,"edx"); # c= H(t);
173 &jz(&label("maw_end")) if ($i != 7-1); 200 &jz(&label("maw_end")) if ($i != 7-1);
174 } 201 }
175 &set_label("maw_end",0); 202 &set_label("maw_end",0);
@@ -184,7 +211,45 @@ sub bn_mul_words
184 { 211 {
185 local($name)=@_; 212 local($name)=@_;
186 213
187 &function_begin($name,""); 214 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
215
216 $r="eax";
217 $a="edx";
218 $c="ecx";
219
220 if ($sse2) {
221 &picmeup("eax","OPENSSL_ia32cap_P");
222 &bt(&DWP(0,"eax"),26);
223 &jnc(&label("mw_non_sse2"));
224
225 &mov($r,&wparam(0));
226 &mov($a,&wparam(1));
227 &mov($c,&wparam(2));
228 &movd("mm0",&wparam(3)); # mm0 = w
229 &pxor("mm1","mm1"); # mm1 = carry = 0
230
231 &set_label("mw_sse2_loop",16);
232 &movd("mm2",&DWP(0,$a)); # mm2 = a[i]
233 &pmuludq("mm2","mm0"); # a[i] *= w
234 &lea($a,&DWP(4,$a));
235 &paddq("mm1","mm2"); # carry += a[i]*w
236 &movd(&DWP(0,$r),"mm1"); # r[i] = carry_low
237 &sub($c,1);
238 &psrlq("mm1",32); # carry = carry_high
239 &lea($r,&DWP(4,$r));
240 &jnz(&label("mw_sse2_loop"));
241
242 &movd("eax","mm1"); # return carry
243 &emms();
244 &ret();
245 &set_label("mw_non_sse2",16);
246 }
247
248 # function_begin prologue
249 &push("ebp");
250 &push("ebx");
251 &push("esi");
252 &push("edi");
188 253
189 &comment(""); 254 &comment("");
190 $Low="eax"; 255 $Low="eax";
@@ -257,7 +322,40 @@ sub bn_sqr_words
257 { 322 {
258 local($name)=@_; 323 local($name)=@_;
259 324
260 &function_begin($name,""); 325 &function_begin_B($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":"");
326
327 $r="eax";
328 $a="edx";
329 $c="ecx";
330
331 if ($sse2) {
332 &picmeup("eax","OPENSSL_ia32cap_P");
333 &bt(&DWP(0,"eax"),26);
334 &jnc(&label("sqr_non_sse2"));
335
336 &mov($r,&wparam(0));
337 &mov($a,&wparam(1));
338 &mov($c,&wparam(2));
339
340 &set_label("sqr_sse2_loop",16);
341 &movd("mm0",&DWP(0,$a)); # mm0 = a[i]
342 &pmuludq("mm0","mm0"); # a[i] *= a[i]
343 &lea($a,&DWP(4,$a)); # a++
344 &movq(&QWP(0,$r),"mm0"); # r[i] = a[i]*a[i]
345 &sub($c,1);
346 &lea($r,&DWP(8,$r)); # r += 2
347 &jnz(&label("sqr_sse2_loop"));
348
349 &emms();
350 &ret();
351 &set_label("sqr_non_sse2",16);
352 }
353
354 # function_begin prologue
355 &push("ebp");
356 &push("ebx");
357 &push("esi");
358 &push("edi");
261 359
262 &comment(""); 360 &comment("");
263 $r="esi"; 361 $r="esi";
@@ -313,12 +411,13 @@ sub bn_div_words
313 { 411 {
314 local($name)=@_; 412 local($name)=@_;
315 413
316 &function_begin($name,""); 414 &function_begin_B($name,"");
317 &mov("edx",&wparam(0)); # 415 &mov("edx",&wparam(0)); #
318 &mov("eax",&wparam(1)); # 416 &mov("eax",&wparam(1)); #
319 &mov("ebx",&wparam(2)); # 417 &mov("ecx",&wparam(2)); #
320 &div("ebx"); 418 &div("ecx");
321 &function_end($name); 419 &ret();
420 &function_end_B($name);
322 } 421 }
323 422
324sub bn_add_words 423sub bn_add_words