summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/asm/bn-586.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn/asm/bn-586.pl')
-rw-r--r--src/lib/libcrypto/bn/asm/bn-586.pl86
1 files changed, 2 insertions, 84 deletions
diff --git a/src/lib/libcrypto/bn/asm/bn-586.pl b/src/lib/libcrypto/bn/asm/bn-586.pl
index 26c2685a72..c4de4a2bee 100644
--- a/src/lib/libcrypto/bn/asm/bn-586.pl
+++ b/src/lib/libcrypto/bn/asm/bn-586.pl
@@ -5,18 +5,13 @@ require "x86asm.pl";
5 5
6&asm_init($ARGV[0],$0); 6&asm_init($ARGV[0],$0);
7 7
8$sse2=0;
9for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
10
11&external_label("OPENSSL_ia32cap_P") if ($sse2);
12
13&bn_mul_add_words("bn_mul_add_words"); 8&bn_mul_add_words("bn_mul_add_words");
14&bn_mul_words("bn_mul_words"); 9&bn_mul_words("bn_mul_words");
15&bn_sqr_words("bn_sqr_words"); 10&bn_sqr_words("bn_sqr_words");
16&bn_div_words("bn_div_words"); 11&bn_div_words("bn_div_words");
17&bn_add_words("bn_add_words"); 12&bn_add_words("bn_add_words");
18&bn_sub_words("bn_sub_words"); 13&bn_sub_words("bn_sub_words");
19&bn_sub_part_words("bn_sub_part_words"); 14#&bn_sub_part_words("bn_sub_part_words");
20 15
21&asm_finish(); 16&asm_finish();
22 17
@@ -24,7 +19,7 @@ sub bn_mul_add_words
24 { 19 {
25 local($name)=@_; 20 local($name)=@_;
26 21
27 &function_begin($name,$sse2?"EXTRN\t_OPENSSL_ia32cap_P:DWORD":""); 22 &function_begin($name,"");
28 23
29 &comment(""); 24 &comment("");
30 $Low="eax"; 25 $Low="eax";
@@ -47,83 +42,6 @@ sub bn_mul_add_words
47 42
48 &jz(&label("maw_finish")); 43 &jz(&label("maw_finish"));
49 44
50 if ($sse2) {
51 &picmeup("eax","OPENSSL_ia32cap_P");
52 &bt(&DWP(0,"eax"),26);
53 &jnc(&label("maw_loop"));
54
55 &movd("mm0",$w); # mm0 = w
56 &pxor("mm1","mm1"); # mm1 = carry_in
57
58 &set_label("maw_sse2_loop",0);
59 &movd("mm3",&DWP(0,$r,"",0)); # mm3 = r[0]
60 &paddq("mm1","mm3"); # mm1 = carry_in + r[0]
61 &movd("mm2",&DWP(0,$a,"",0)); # mm2 = a[0]
62 &pmuludq("mm2","mm0"); # mm2 = w*a[0]
63 &movd("mm4",&DWP(4,$a,"",0)); # mm4 = a[1]
64 &pmuludq("mm4","mm0"); # mm4 = w*a[1]
65 &movd("mm6",&DWP(8,$a,"",0)); # mm6 = a[2]
66 &pmuludq("mm6","mm0"); # mm6 = w*a[2]
67 &movd("mm7",&DWP(12,$a,"",0)); # mm7 = a[3]
68 &pmuludq("mm7","mm0"); # mm7 = w*a[3]
69 &paddq("mm1","mm2"); # mm1 = carry_in + r[0] + w*a[0]
70 &movd("mm3",&DWP(4,$r,"",0)); # mm3 = r[1]
71 &paddq("mm3","mm4"); # mm3 = r[1] + w*a[1]
72 &movd("mm5",&DWP(8,$r,"",0)); # mm5 = r[2]
73 &paddq("mm5","mm6"); # mm5 = r[2] + w*a[2]
74 &movd("mm4",&DWP(12,$r,"",0)); # mm4 = r[3]
75 &paddq("mm7","mm4"); # mm7 = r[3] + w*a[3]
76 &movd(&DWP(0,$r,"",0),"mm1");
77 &movd("mm2",&DWP(16,$a,"",0)); # mm2 = a[4]
78 &pmuludq("mm2","mm0"); # mm2 = w*a[4]
79 &psrlq("mm1",32); # mm1 = carry0
80 &movd("mm4",&DWP(20,$a,"",0)); # mm4 = a[5]
81 &pmuludq("mm4","mm0"); # mm4 = w*a[5]
82 &paddq("mm1","mm3"); # mm1 = carry0 + r[1] + w*a[1]
83 &movd("mm6",&DWP(24,$a,"",0)); # mm6 = a[6]
84 &pmuludq("mm6","mm0"); # mm6 = w*a[6]
85 &movd(&DWP(4,$r,"",0),"mm1");
86 &psrlq("mm1",32); # mm1 = carry1
87 &movd("mm3",&DWP(28,$a,"",0)); # mm3 = a[7]
88 &add($a,32);
89 &pmuludq("mm3","mm0"); # mm3 = w*a[7]
90 &paddq("mm1","mm5"); # mm1 = carry1 + r[2] + w*a[2]
91 &movd("mm5",&DWP(16,$r,"",0)); # mm5 = r[4]
92 &paddq("mm2","mm5"); # mm2 = r[4] + w*a[4]
93 &movd(&DWP(8,$r,"",0),"mm1");
94 &psrlq("mm1",32); # mm1 = carry2
95 &paddq("mm1","mm7"); # mm1 = carry2 + r[3] + w*a[3]
96 &movd("mm5",&DWP(20,$r,"",0)); # mm5 = r[5]
97 &paddq("mm4","mm5"); # mm4 = r[5] + w*a[5]
98 &movd(&DWP(12,$r,"",0),"mm1");
99 &psrlq("mm1",32); # mm1 = carry3
100 &paddq("mm1","mm2"); # mm1 = carry3 + r[4] + w*a[4]
101 &movd("mm5",&DWP(24,$r,"",0)); # mm5 = r[6]
102 &paddq("mm6","mm5"); # mm6 = r[6] + w*a[6]
103 &movd(&DWP(16,$r,"",0),"mm1");
104 &psrlq("mm1",32); # mm1 = carry4
105 &paddq("mm1","mm4"); # mm1 = carry4 + r[5] + w*a[5]
106 &movd("mm5",&DWP(28,$r,"",0)); # mm5 = r[7]
107 &paddq("mm3","mm5"); # mm3 = r[7] + w*a[7]
108 &movd(&DWP(20,$r,"",0),"mm1");
109 &psrlq("mm1",32); # mm1 = carry5
110 &paddq("mm1","mm6"); # mm1 = carry5 + r[6] + w*a[6]
111 &movd(&DWP(24,$r,"",0),"mm1");
112 &psrlq("mm1",32); # mm1 = carry6
113 &paddq("mm1","mm3"); # mm1 = carry6 + r[7] + w*a[7]
114 &movd(&DWP(28,$r,"",0),"mm1");
115 &add($r,32);
116 &psrlq("mm1",32); # mm1 = carry_out
117
118 &sub("ecx",8);
119 &jnz(&label("maw_sse2_loop"));
120
121 &movd($c,"mm1"); # c = carry_out
122 &emms();
123
124 &jmp(&label("maw_finish"));
125 }
126
127 &set_label("maw_loop",0); 45 &set_label("maw_loop",0);
128 46
129 &mov(&swtmp(0),"ecx"); # 47 &mov(&swtmp(0),"ecx"); #