diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/libcrypto/bn/bn_mul.c | 387 |
1 files changed, 152 insertions, 235 deletions
diff --git a/src/lib/libcrypto/bn/bn_mul.c b/src/lib/libcrypto/bn/bn_mul.c index 38c01dad18..965c1ad036 100644 --- a/src/lib/libcrypto/bn/bn_mul.c +++ b/src/lib/libcrypto/bn/bn_mul.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_mul.c,v 1.31 2023/02/13 04:25:37 jsing Exp $ */ | 1 | /* $OpenBSD: bn_mul.c,v 1.32 2023/02/14 18:37:15 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -63,293 +63,210 @@ | |||
63 | #include <openssl/opensslconf.h> | 63 | #include <openssl/opensslconf.h> |
64 | 64 | ||
65 | #include "bn_arch.h" | 65 | #include "bn_arch.h" |
66 | #include "bn_internal.h" | ||
66 | #include "bn_local.h" | 67 | #include "bn_local.h" |
67 | 68 | ||
69 | /* | ||
70 | * bn_mul_add_words() computes (carry:r[i]) = a[i] * w + r[i] + carry, where | ||
71 | * a is an array of words and w is a single word. This should really be called | ||
72 | * bn_mulw_add_words() since only one input is an array. This is used as a step | ||
73 | * in the multiplication of word arrays. | ||
74 | */ | ||
68 | #ifndef HAVE_BN_MUL_ADD_WORDS | 75 | #ifndef HAVE_BN_MUL_ADD_WORDS |
69 | #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) | ||
70 | |||
71 | BN_ULONG | 76 | BN_ULONG |
72 | bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | 77 | bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) |
73 | { | 78 | { |
74 | BN_ULONG c1 = 0; | 79 | BN_ULONG carry = 0; |
75 | 80 | ||
76 | assert(num >= 0); | 81 | assert(num >= 0); |
77 | if (num <= 0) | 82 | if (num <= 0) |
78 | return (c1); | 83 | return 0; |
79 | 84 | ||
80 | #ifndef OPENSSL_SMALL_FOOTPRINT | 85 | #ifndef OPENSSL_SMALL_FOOTPRINT |
81 | while (num & ~3) { | 86 | while (num & ~3) { |
82 | mul_add(rp[0], ap[0], w, c1); | 87 | bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]); |
83 | mul_add(rp[1], ap[1], w, c1); | 88 | bn_mulw_addw_addw(a[1], w, r[1], carry, &carry, &r[1]); |
84 | mul_add(rp[2], ap[2], w, c1); | 89 | bn_mulw_addw_addw(a[2], w, r[2], carry, &carry, &r[2]); |
85 | mul_add(rp[3], ap[3], w, c1); | 90 | bn_mulw_addw_addw(a[3], w, r[3], carry, &carry, &r[3]); |
86 | ap += 4; | 91 | a += 4; |
87 | rp += 4; | 92 | r += 4; |
88 | num -= 4; | 93 | num -= 4; |
89 | } | 94 | } |
90 | #endif | 95 | #endif |
91 | while (num) { | 96 | while (num) { |
92 | mul_add(rp[0], ap[0], w, c1); | 97 | bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]); |
93 | ap++; | 98 | a++; |
94 | rp++; | 99 | r++; |
95 | num--; | 100 | num--; |
96 | } | 101 | } |
97 | 102 | ||
98 | return (c1); | 103 | return carry; |
99 | } | 104 | } |
105 | #endif | ||
100 | 106 | ||
101 | #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ | 107 | /* |
102 | 108 | * bn_mul_comba4() computes r[] = a[] * b[] using Comba multiplication | |
103 | BN_ULONG | 109 | * (https://everything2.com/title/Comba+multiplication), where a and b are both |
104 | bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | 110 | * four word arrays, producing an eight word array result. |
111 | */ | ||
112 | #ifndef HAVE_BN_MUL_COMBA4 | ||
113 | void | ||
114 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | ||
105 | { | 115 | { |
106 | BN_ULONG c = 0; | 116 | BN_ULONG c0, c1, c2; |
107 | BN_ULONG bl, bh; | ||
108 | 117 | ||
109 | assert(num >= 0); | 118 | bn_mulw_addtw(a[0], b[0], 0, 0, 0, &c2, &c1, &r[0]); |
110 | if (num <= 0) | ||
111 | return ((BN_ULONG)0); | ||
112 | 119 | ||
113 | bl = LBITS(w); | 120 | bn_mulw_addtw(a[0], b[1], 0, c2, c1, &c2, &c1, &c0); |
114 | bh = HBITS(w); | 121 | bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]); |
115 | 122 | ||
116 | #ifndef OPENSSL_SMALL_FOOTPRINT | 123 | bn_mulw_addtw(a[2], b[0], 0, c2, c1, &c2, &c1, &c0); |
117 | while (num & ~3) { | 124 | bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0); |
118 | mul_add(rp[0], ap[0], bl, bh, c); | 125 | bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]); |
119 | mul_add(rp[1], ap[1], bl, bh, c); | ||
120 | mul_add(rp[2], ap[2], bl, bh, c); | ||
121 | mul_add(rp[3], ap[3], bl, bh, c); | ||
122 | ap += 4; | ||
123 | rp += 4; | ||
124 | num -= 4; | ||
125 | } | ||
126 | #endif | ||
127 | while (num) { | ||
128 | mul_add(rp[0], ap[0], bl, bh, c); | ||
129 | ap++; | ||
130 | rp++; | ||
131 | num--; | ||
132 | } | ||
133 | return (c); | ||
134 | } | ||
135 | 126 | ||
136 | #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ | 127 | bn_mulw_addtw(a[0], b[3], 0, c2, c1, &c2, &c1, &c0); |
137 | #endif | 128 | bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0); |
129 | bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0); | ||
130 | bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]); | ||
138 | 131 | ||
139 | #ifndef HAVE_BN_MUL_COMBA4 | 132 | bn_mulw_addtw(a[3], b[1], 0, c2, c1, &c2, &c1, &c0); |
140 | void | 133 | bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0); |
141 | bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 134 | bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &r[4]); |
142 | { | 135 | |
143 | BN_ULONG c1, c2, c3; | 136 | bn_mulw_addtw(a[2], b[3], 0, c2, c1, &c2, &c1, &c0); |
144 | 137 | bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &r[5]); | |
145 | c1 = 0; | 138 | |
146 | c2 = 0; | 139 | bn_mulw_addtw(a[3], b[3], 0, c2, c1, &c2, &r[7], &r[6]); |
147 | c3 = 0; | ||
148 | mul_add_c(a[0], b[0], c1, c2, c3); | ||
149 | r[0] = c1; | ||
150 | c1 = 0; | ||
151 | mul_add_c(a[0], b[1], c2, c3, c1); | ||
152 | mul_add_c(a[1], b[0], c2, c3, c1); | ||
153 | r[1] = c2; | ||
154 | c2 = 0; | ||
155 | mul_add_c(a[2], b[0], c3, c1, c2); | ||
156 | mul_add_c(a[1], b[1], c3, c1, c2); | ||
157 | mul_add_c(a[0], b[2], c3, c1, c2); | ||
158 | r[2] = c3; | ||
159 | c3 = 0; | ||
160 | mul_add_c(a[0], b[3], c1, c2, c3); | ||
161 | mul_add_c(a[1], b[2], c1, c2, c3); | ||
162 | mul_add_c(a[2], b[1], c1, c2, c3); | ||
163 | mul_add_c(a[3], b[0], c1, c2, c3); | ||
164 | r[3] = c1; | ||
165 | c1 = 0; | ||
166 | mul_add_c(a[3], b[1], c2, c3, c1); | ||
167 | mul_add_c(a[2], b[2], c2, c3, c1); | ||
168 | mul_add_c(a[1], b[3], c2, c3, c1); | ||
169 | r[4] = c2; | ||
170 | c2 = 0; | ||
171 | mul_add_c(a[2], b[3], c3, c1, c2); | ||
172 | mul_add_c(a[3], b[2], c3, c1, c2); | ||
173 | r[5] = c3; | ||
174 | c3 = 0; | ||
175 | mul_add_c(a[3], b[3], c1, c2, c3); | ||
176 | r[6] = c1; | ||
177 | r[7] = c2; | ||
178 | } | 140 | } |
179 | #endif | 141 | #endif |
180 | 142 | ||
143 | /* | ||
144 | * bn_mul_comba8() computes r[] = a[] * b[] using Comba multiplication | ||
145 | * (https://everything2.com/title/Comba+multiplication), where a and b are both | ||
146 | * eight word arrays, producing a 16 word array result. | ||
147 | */ | ||
181 | #ifndef HAVE_BN_MUL_COMBA8 | 148 | #ifndef HAVE_BN_MUL_COMBA8 |
182 | void | 149 | void |
183 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) | 150 | bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) |
184 | { | 151 | { |
185 | BN_ULONG c1, c2, c3; | 152 | BN_ULONG c0, c1, c2; |
186 | 153 | ||
187 | c1 = 0; | 154 | bn_mulw_addtw(a[0], b[0], 0, 0, 0, &c2, &c1, &r[0]); |
188 | c2 = 0; | 155 | |
189 | c3 = 0; | 156 | bn_mulw_addtw(a[0], b[1], 0, c2, c1, &c2, &c1, &c0); |
190 | mul_add_c(a[0], b[0], c1, c2, c3); | 157 | bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]); |
191 | r[0] = c1; | 158 | |
192 | c1 = 0; | 159 | bn_mulw_addtw(a[2], b[0], 0, c2, c1, &c2, &c1, &c0); |
193 | mul_add_c(a[0], b[1], c2, c3, c1); | 160 | bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0); |
194 | mul_add_c(a[1], b[0], c2, c3, c1); | 161 | bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]); |
195 | r[1] = c2; | 162 | |
196 | c2 = 0; | 163 | bn_mulw_addtw(a[0], b[3], 0, c2, c1, &c2, &c1, &c0); |
197 | mul_add_c(a[2], b[0], c3, c1, c2); | 164 | bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0); |
198 | mul_add_c(a[1], b[1], c3, c1, c2); | 165 | bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0); |
199 | mul_add_c(a[0], b[2], c3, c1, c2); | 166 | bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]); |
200 | r[2] = c3; | 167 | |
201 | c3 = 0; | 168 | bn_mulw_addtw(a[4], b[0], 0, c2, c1, &c2, &c1, &c0); |
202 | mul_add_c(a[0], b[3], c1, c2, c3); | 169 | bn_mulw_addtw(a[3], b[1], c2, c1, c0, &c2, &c1, &c0); |
203 | mul_add_c(a[1], b[2], c1, c2, c3); | 170 | bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0); |
204 | mul_add_c(a[2], b[1], c1, c2, c3); | 171 | bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &c0); |
205 | mul_add_c(a[3], b[0], c1, c2, c3); | 172 | bn_mulw_addtw(a[0], b[4], c2, c1, c0, &c2, &c1, &r[4]); |
206 | r[3] = c1; | 173 | |
207 | c1 = 0; | 174 | bn_mulw_addtw(a[0], b[5], 0, c2, c1, &c2, &c1, &c0); |
208 | mul_add_c(a[4], b[0], c2, c3, c1); | 175 | bn_mulw_addtw(a[1], b[4], c2, c1, c0, &c2, &c1, &c0); |
209 | mul_add_c(a[3], b[1], c2, c3, c1); | 176 | bn_mulw_addtw(a[2], b[3], c2, c1, c0, &c2, &c1, &c0); |
210 | mul_add_c(a[2], b[2], c2, c3, c1); | 177 | bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &c0); |
211 | mul_add_c(a[1], b[3], c2, c3, c1); | 178 | bn_mulw_addtw(a[4], b[1], c2, c1, c0, &c2, &c1, &c0); |
212 | mul_add_c(a[0], b[4], c2, c3, c1); | 179 | bn_mulw_addtw(a[5], b[0], c2, c1, c0, &c2, &c1, &r[5]); |
213 | r[4] = c2; | 180 | |
214 | c2 = 0; | 181 | bn_mulw_addtw(a[6], b[0], 0, c2, c1, &c2, &c1, &c0); |
215 | mul_add_c(a[0], b[5], c3, c1, c2); | 182 | bn_mulw_addtw(a[5], b[1], c2, c1, c0, &c2, &c1, &c0); |
216 | mul_add_c(a[1], b[4], c3, c1, c2); | 183 | bn_mulw_addtw(a[4], b[2], c2, c1, c0, &c2, &c1, &c0); |
217 | mul_add_c(a[2], b[3], c3, c1, c2); | 184 | bn_mulw_addtw(a[3], b[3], c2, c1, c0, &c2, &c1, &c0); |
218 | mul_add_c(a[3], b[2], c3, c1, c2); | 185 | bn_mulw_addtw(a[2], b[4], c2, c1, c0, &c2, &c1, &c0); |
219 | mul_add_c(a[4], b[1], c3, c1, c2); | 186 | bn_mulw_addtw(a[1], b[5], c2, c1, c0, &c2, &c1, &c0); |
220 | mul_add_c(a[5], b[0], c3, c1, c2); | 187 | bn_mulw_addtw(a[0], b[6], c2, c1, c0, &c2, &c1, &r[6]); |
221 | r[5] = c3; | 188 | |
222 | c3 = 0; | 189 | bn_mulw_addtw(a[0], b[7], 0, c2, c1, &c2, &c1, &c0); |
223 | mul_add_c(a[6], b[0], c1, c2, c3); | 190 | bn_mulw_addtw(a[1], b[6], c2, c1, c0, &c2, &c1, &c0); |
224 | mul_add_c(a[5], b[1], c1, c2, c3); | 191 | bn_mulw_addtw(a[2], b[5], c2, c1, c0, &c2, &c1, &c0); |
225 | mul_add_c(a[4], b[2], c1, c2, c3); | 192 | bn_mulw_addtw(a[3], b[4], c2, c1, c0, &c2, &c1, &c0); |
226 | mul_add_c(a[3], b[3], c1, c2, c3); | 193 | bn_mulw_addtw(a[4], b[3], c2, c1, c0, &c2, &c1, &c0); |
227 | mul_add_c(a[2], b[4], c1, c2, c3); | 194 | bn_mulw_addtw(a[5], b[2], c2, c1, c0, &c2, &c1, &c0); |
228 | mul_add_c(a[1], b[5], c1, c2, c3); | 195 | bn_mulw_addtw(a[6], b[1], c2, c1, c0, &c2, &c1, &c0); |
229 | mul_add_c(a[0], b[6], c1, c2, c3); | 196 | bn_mulw_addtw(a[7], b[0], c2, c1, c0, &c2, &c1, &r[7]); |
230 | r[6] = c1; | 197 | |
231 | c1 = 0; | 198 | bn_mulw_addtw(a[7], b[1], 0, c2, c1, &c2, &c1, &c0); |
232 | mul_add_c(a[0], b[7], c2, c3, c1); | 199 | bn_mulw_addtw(a[6], b[2], c2, c1, c0, &c2, &c1, &c0); |
233 | mul_add_c(a[1], b[6], c2, c3, c1); | 200 | bn_mulw_addtw(a[5], b[3], c2, c1, c0, &c2, &c1, &c0); |
234 | mul_add_c(a[2], b[5], c2, c3, c1); | 201 | bn_mulw_addtw(a[4], b[4], c2, c1, c0, &c2, &c1, &c0); |
235 | mul_add_c(a[3], b[4], c2, c3, c1); | 202 | bn_mulw_addtw(a[3], b[5], c2, c1, c0, &c2, &c1, &c0); |
236 | mul_add_c(a[4], b[3], c2, c3, c1); | 203 | bn_mulw_addtw(a[2], b[6], c2, c1, c0, &c2, &c1, &c0); |
237 | mul_add_c(a[5], b[2], c2, c3, c1); | 204 | bn_mulw_addtw(a[1], b[7], c2, c1, c0, &c2, &c1, &r[8]); |
238 | mul_add_c(a[6], b[1], c2, c3, c1); | 205 | |
239 | mul_add_c(a[7], b[0], c2, c3, c1); | 206 | bn_mulw_addtw(a[2], b[7], 0, c2, c1, &c2, &c1, &c0); |
240 | r[7] = c2; | 207 | bn_mulw_addtw(a[3], b[6], c2, c1, c0, &c2, &c1, &c0); |
241 | c2 = 0; | 208 | bn_mulw_addtw(a[4], b[5], c2, c1, c0, &c2, &c1, &c0); |
242 | mul_add_c(a[7], b[1], c3, c1, c2); | 209 | bn_mulw_addtw(a[5], b[4], c2, c1, c0, &c2, &c1, &c0); |
243 | mul_add_c(a[6], b[2], c3, c1, c2); | 210 | bn_mulw_addtw(a[6], b[3], c2, c1, c0, &c2, &c1, &c0); |
244 | mul_add_c(a[5], b[3], c3, c1, c2); | 211 | bn_mulw_addtw(a[7], b[2], c2, c1, c0, &c2, &c1, &r[9]); |
245 | mul_add_c(a[4], b[4], c3, c1, c2); | 212 | |
246 | mul_add_c(a[3], b[5], c3, c1, c2); | 213 | bn_mulw_addtw(a[7], b[3], 0, c2, c1, &c2, &c1, &c0); |
247 | mul_add_c(a[2], b[6], c3, c1, c2); | 214 | bn_mulw_addtw(a[6], b[4], c2, c1, c0, &c2, &c1, &c0); |
248 | mul_add_c(a[1], b[7], c3, c1, c2); | 215 | bn_mulw_addtw(a[5], b[5], c2, c1, c0, &c2, &c1, &c0); |
249 | r[8] = c3; | 216 | bn_mulw_addtw(a[4], b[6], c2, c1, c0, &c2, &c1, &c0); |
250 | c3 = 0; | 217 | bn_mulw_addtw(a[3], b[7], c2, c1, c0, &c2, &c1, &r[10]); |
251 | mul_add_c(a[2], b[7], c1, c2, c3); | 218 | |
252 | mul_add_c(a[3], b[6], c1, c2, c3); | 219 | bn_mulw_addtw(a[4], b[7], 0, c2, c1, &c2, &c1, &c0); |
253 | mul_add_c(a[4], b[5], c1, c2, c3); | 220 | bn_mulw_addtw(a[5], b[6], c2, c1, c0, &c2, &c1, &c0); |
254 | mul_add_c(a[5], b[4], c1, c2, c3); | 221 | bn_mulw_addtw(a[6], b[5], c2, c1, c0, &c2, &c1, &c0); |
255 | mul_add_c(a[6], b[3], c1, c2, c3); | 222 | bn_mulw_addtw(a[7], b[4], c2, c1, c0, &c2, &c1, &r[11]); |
256 | mul_add_c(a[7], b[2], c1, c2, c3); | 223 | |
257 | r[9] = c1; | 224 | bn_mulw_addtw(a[7], b[5], 0, c2, c1, &c2, &c1, &c0); |
258 | c1 = 0; | 225 | bn_mulw_addtw(a[6], b[6], c2, c1, c0, &c2, &c1, &c0); |
259 | mul_add_c(a[7], b[3], c2, c3, c1); | 226 | bn_mulw_addtw(a[5], b[7], c2, c1, c0, &c2, &c1, &r[12]); |
260 | mul_add_c(a[6], b[4], c2, c3, c1); | 227 | |
261 | mul_add_c(a[5], b[5], c2, c3, c1); | 228 | bn_mulw_addtw(a[6], b[7], 0, c2, c1, &c2, &c1, &c0); |
262 | mul_add_c(a[4], b[6], c2, c3, c1); | 229 | bn_mulw_addtw(a[7], b[6], c2, c1, c0, &c2, &c1, &r[13]); |
263 | mul_add_c(a[3], b[7], c2, c3, c1); | 230 | |
264 | r[10] = c2; | 231 | bn_mulw_addtw(a[7], b[7], 0, c2, c1, &c2, &r[15], &r[14]); |
265 | c2 = 0; | ||
266 | mul_add_c(a[4], b[7], c3, c1, c2); | ||
267 | mul_add_c(a[5], b[6], c3, c1, c2); | ||
268 | mul_add_c(a[6], b[5], c3, c1, c2); | ||
269 | mul_add_c(a[7], b[4], c3, c1, c2); | ||
270 | r[11] = c3; | ||
271 | c3 = 0; | ||
272 | mul_add_c(a[7], b[5], c1, c2, c3); | ||
273 | mul_add_c(a[6], b[6], c1, c2, c3); | ||
274 | mul_add_c(a[5], b[7], c1, c2, c3); | ||
275 | r[12] = c1; | ||
276 | c1 = 0; | ||
277 | mul_add_c(a[6], b[7], c2, c3, c1); | ||
278 | mul_add_c(a[7], b[6], c2, c3, c1); | ||
279 | r[13] = c2; | ||
280 | c2 = 0; | ||
281 | mul_add_c(a[7], b[7], c3, c1, c2); | ||
282 | r[14] = c3; | ||
283 | r[15] = c1; | ||
284 | } | 232 | } |
285 | #endif | 233 | #endif |
286 | 234 | ||
235 | /* | ||
236 | * bn_mul_words() computes (carry:r[i]) = a[i] * w + carry, where a is an array | ||
237 | * of words and w is a single word. This should really be called bn_mulw_words() | ||
238 | * since only one input is an array. This is used as a step in the multiplication | ||
239 | * of word arrays. | ||
240 | */ | ||
287 | #ifndef HAVE_BN_MUL_WORDS | 241 | #ifndef HAVE_BN_MUL_WORDS |
288 | #if defined(BN_LLONG) || defined(BN_UMULT_HIGH) | ||
289 | |||
290 | BN_ULONG | 242 | BN_ULONG |
291 | bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | 243 | bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w) |
292 | { | ||
293 | BN_ULONG c1 = 0; | ||
294 | |||
295 | assert(num >= 0); | ||
296 | if (num <= 0) | ||
297 | return (c1); | ||
298 | |||
299 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
300 | while (num & ~3) { | ||
301 | mul(rp[0], ap[0], w, c1); | ||
302 | mul(rp[1], ap[1], w, c1); | ||
303 | mul(rp[2], ap[2], w, c1); | ||
304 | mul(rp[3], ap[3], w, c1); | ||
305 | ap += 4; | ||
306 | rp += 4; | ||
307 | num -= 4; | ||
308 | } | ||
309 | #endif | ||
310 | while (num) { | ||
311 | mul(rp[0], ap[0], w, c1); | ||
312 | ap++; | ||
313 | rp++; | ||
314 | num--; | ||
315 | } | ||
316 | return (c1); | ||
317 | } | ||
318 | #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ | ||
319 | |||
320 | BN_ULONG | ||
321 | bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) | ||
322 | { | 244 | { |
323 | BN_ULONG carry = 0; | 245 | BN_ULONG carry = 0; |
324 | BN_ULONG bl, bh; | ||
325 | 246 | ||
326 | assert(num >= 0); | 247 | assert(num >= 0); |
327 | if (num <= 0) | 248 | if (num <= 0) |
328 | return ((BN_ULONG)0); | 249 | return 0; |
329 | |||
330 | bl = LBITS(w); | ||
331 | bh = HBITS(w); | ||
332 | 250 | ||
333 | #ifndef OPENSSL_SMALL_FOOTPRINT | 251 | #ifndef OPENSSL_SMALL_FOOTPRINT |
334 | while (num & ~3) { | 252 | while (num & ~3) { |
335 | mul(rp[0], ap[0], bl, bh, carry); | 253 | bn_mulw_addw(a[0], w, carry, &carry, &r[0]); |
336 | mul(rp[1], ap[1], bl, bh, carry); | 254 | bn_mulw_addw(a[1], w, carry, &carry, &r[1]); |
337 | mul(rp[2], ap[2], bl, bh, carry); | 255 | bn_mulw_addw(a[2], w, carry, &carry, &r[2]); |
338 | mul(rp[3], ap[3], bl, bh, carry); | 256 | bn_mulw_addw(a[3], w, carry, &carry, &r[3]); |
339 | ap += 4; | 257 | a += 4; |
340 | rp += 4; | 258 | r += 4; |
341 | num -= 4; | 259 | num -= 4; |
342 | } | 260 | } |
343 | #endif | 261 | #endif |
344 | while (num) { | 262 | while (num) { |
345 | mul(rp[0], ap[0], bl, bh, carry); | 263 | bn_mulw_addw(a[0], w, carry, &carry, &r[0]); |
346 | ap++; | 264 | a++; |
347 | rp++; | 265 | r++; |
348 | num--; | 266 | num--; |
349 | } | 267 | } |
350 | return (carry); | 268 | return carry; |
351 | } | 269 | } |
352 | #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */ | ||
353 | #endif | 270 | #endif |
354 | 271 | ||
355 | #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) | 272 | #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS) |