summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto/bn/bn_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/libcrypto/bn/bn_internal.h')
-rw-r--r--src/lib/libcrypto/bn/bn_internal.h114
1 files changed, 59 insertions, 55 deletions
diff --git a/src/lib/libcrypto/bn/bn_internal.h b/src/lib/libcrypto/bn/bn_internal.h
index 64240555d1..2872e21185 100644
--- a/src/lib/libcrypto/bn/bn_internal.h
+++ b/src/lib/libcrypto/bn/bn_internal.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_internal.h,v 1.6 2023/02/16 10:02:02 jsing Exp $ */ 1/* $OpenBSD: bn_internal.h,v 1.7 2023/02/16 10:41:03 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2023 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -159,17 +159,21 @@ bn_subw_subw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_borrow,
159} 159}
160#endif 160#endif
161 161
162#ifndef HAVE_BN_UMUL_HILO 162/*
163 * bn_mulw() computes (r1:r0) = a * b, where both inputs are single words,
164 * producing a double word result.
165 */
166#ifndef HAVE_BN_MULW
163#ifdef BN_LLONG 167#ifdef BN_LLONG
164static inline void 168static inline void
165bn_umul_hilo(BN_ULONG a, BN_ULONG b, BN_ULONG *out_h, BN_ULONG *out_l) 169bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
166{ 170{
167 BN_ULLONG r; 171 BN_ULLONG r;
168 172
169 r = (BN_ULLONG)a * (BN_ULLONG)b; 173 r = (BN_ULLONG)a * (BN_ULLONG)b;
170 174
171 *out_h = r >> BN_BITS2; 175 *out_r1 = r >> BN_BITS2;
172 *out_l = r & BN_MASK2; 176 *out_r0 = r & BN_MASK2;
173} 177}
174 178
175#else /* !BN_LLONG */ 179#else /* !BN_LLONG */
@@ -193,38 +197,38 @@ bn_umul_hilo(BN_ULONG a, BN_ULONG b, BN_ULONG *out_h, BN_ULONG *out_l)
193 */ 197 */
194#if 1 198#if 1
195static inline void 199static inline void
196bn_umul_hilo(BN_ULONG a, BN_ULONG b, BN_ULONG *out_h, BN_ULONG *out_l) 200bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
197{ 201{
198 BN_ULONG ah, al, bh, bl, h, l, x, c1, c2; 202 BN_ULONG a1, a0, b1, b0, r1, r0, c1, c2, x;
199 203
200 ah = a >> BN_BITS4; 204 a1 = a >> BN_BITS4;
201 al = a & BN_MASK2l; 205 a0 = a & BN_MASK2l;
202 bh = b >> BN_BITS4; 206 b1 = b >> BN_BITS4;
203 bl = b & BN_MASK2l; 207 b0 = b & BN_MASK2l;
204 208
205 h = ah * bh; 209 r1 = a1 * b1;
206 l = al * bl; 210 r0 = a0 * b0;
207 211
208 /* (ah * bl) << BN_BITS4, partition the result across h:l with carry. */ 212 /* (a1 * b0) << BN_BITS4, partition the result across r1:r0 with carry. */
209 x = ah * bl; 213 x = a1 * b0;
210 h += x >> BN_BITS4; 214 r1 += x >> BN_BITS4;
211 x <<= BN_BITS4; 215 x <<= BN_BITS4;
212 c1 = l | x; 216 c1 = r0 | x;
213 c2 = l & x; 217 c2 = r0 & x;
214 l += x; 218 r0 += x;
215 h += ((c1 & ~l) | c2) >> (BN_BITS2 - 1); /* carry */ 219 r1 += ((c1 & ~r0) | c2) >> (BN_BITS2 - 1); /* carry */
216 220
217 /* (bh * al) << BN_BITS4, partition the result across h:l with carry. */ 221 /* (b1 * a0) << BN_BITS4, partition the result across r1:r0 with carry. */
218 x = bh * al; 222 x = b1 * a0;
219 h += x >> BN_BITS4; 223 r1 += x >> BN_BITS4;
220 x <<= BN_BITS4; 224 x <<= BN_BITS4;
221 c1 = l | x; 225 c1 = r0 | x;
222 c2 = l & x; 226 c2 = r0 & x;
223 l += x; 227 r0 += x;
224 h += ((c1 & ~l) | c2) >> (BN_BITS2 - 1); /* carry */ 228 r1 += ((c1 & ~r0) | c2) >> (BN_BITS2 - 1); /* carry */
225 229
226 *out_h = h; 230 *out_r1 = r1;
227 *out_l = l; 231 *out_r0 = r0;
228} 232}
229#else 233#else
230 234
@@ -236,62 +240,62 @@ bn_umul_hilo(BN_ULONG a, BN_ULONG b, BN_ULONG *out_h, BN_ULONG *out_l)
236 * implementations should eventually be removed. 240 * implementations should eventually be removed.
237 */ 241 */
238static inline void 242static inline void
239bn_umul_hilo(BN_ULONG a, BN_ULONG b, BN_ULONG *out_h, BN_ULONG *out_l) 243bn_mulw(BN_ULONG a, BN_ULONG b, BN_ULONG *out_r1, BN_ULONG *out_r0)
240{ 244{
241 BN_ULONG ah, bh, al, bl, x, h, l; 245 BN_ULONG a1, a0, b1, b0, r1, r0, x;
242 BN_ULONG acc0, acc1, acc2, acc3; 246 BN_ULONG acc0, acc1, acc2, acc3;
243 247
244 ah = a >> BN_BITS4; 248 a1 = a >> BN_BITS4;
245 bh = b >> BN_BITS4; 249 b1 = b >> BN_BITS4;
246 al = a & BN_MASK2l; 250 a0 = a & BN_MASK2l;
247 bl = b & BN_MASK2l; 251 b0 = b & BN_MASK2l;
248 252
249 h = ah * bh; 253 r1 = a1 * b1;
250 l = al * bl; 254 r0 = a0 * b0;
251 255
252 acc0 = l & BN_MASK2l; 256 acc0 = r0 & BN_MASK2l;
253 acc1 = l >> BN_BITS4; 257 acc1 = r0 >> BN_BITS4;
254 acc2 = h & BN_MASK2l; 258 acc2 = r1 & BN_MASK2l;
255 acc3 = h >> BN_BITS4; 259 acc3 = r1 >> BN_BITS4;
256 260
257 /* (ah * bl) << BN_BITS4, partition the result across h:l. */ 261 /* (a1 * b0) << BN_BITS4, partition the result across r1:r0. */
258 x = ah * bl; 262 x = a1 * b0;
259 acc1 += x & BN_MASK2l; 263 acc1 += x & BN_MASK2l;
260 acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4); 264 acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4);
261 acc1 &= BN_MASK2l; 265 acc1 &= BN_MASK2l;
262 acc3 += acc2 >> BN_BITS4; 266 acc3 += acc2 >> BN_BITS4;
263 acc2 &= BN_MASK2l; 267 acc2 &= BN_MASK2l;
264 268
265 /* (bh * al) << BN_BITS4, partition the result across h:l. */ 269 /* (b1 * a0) << BN_BITS4, partition the result across r1:r0. */
266 x = bh * al; 270 x = b1 * a0;
267 acc1 += x & BN_MASK2l; 271 acc1 += x & BN_MASK2l;
268 acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4); 272 acc2 += (acc1 >> BN_BITS4) + (x >> BN_BITS4);
269 acc1 &= BN_MASK2l; 273 acc1 &= BN_MASK2l;
270 acc3 += acc2 >> BN_BITS4; 274 acc3 += acc2 >> BN_BITS4;
271 acc2 &= BN_MASK2l; 275 acc2 &= BN_MASK2l;
272 276
273 *out_h = (acc3 << BN_BITS4) | acc2; 277 *out_r1 = (acc3 << BN_BITS4) | acc2;
274 *out_l = (acc1 << BN_BITS4) | acc0; 278 *out_r0 = (acc1 << BN_BITS4) | acc0;
275} 279}
276#endif 280#endif
277#endif /* !BN_LLONG */ 281#endif /* !BN_LLONG */
278#endif 282#endif
279 283
280#ifndef HAVE_BN_UMUL_LO 284#ifndef HAVE_BN_MULW_LO
281static inline BN_ULONG 285static inline BN_ULONG
282bn_umul_lo(BN_ULONG a, BN_ULONG b) 286bn_mulw_lo(BN_ULONG a, BN_ULONG b)
283{ 287{
284 return a * b; 288 return a * b;
285} 289}
286#endif 290#endif
287 291
288#ifndef HAVE_BN_UMUL_HI 292#ifndef HAVE_BN_MULW_HI
289static inline BN_ULONG 293static inline BN_ULONG
290bn_umul_hi(BN_ULONG a, BN_ULONG b) 294bn_mulw_hi(BN_ULONG a, BN_ULONG b)
291{ 295{
292 BN_ULONG h, l; 296 BN_ULONG h, l;
293 297
294 bn_umul_hilo(a, b, &h, &l); 298 bn_mulw(a, b, &h, &l);
295 299
296 return h; 300 return h;
297} 301}
@@ -308,7 +312,7 @@ bn_mulw_addw(BN_ULONG a, BN_ULONG b, BN_ULONG c, BN_ULONG *out_r1,
308{ 312{
309 BN_ULONG carry, r1, r0; 313 BN_ULONG carry, r1, r0;
310 314
311 bn_umul_hilo(a, b, &r1, &r0); 315 bn_mulw(a, b, &r1, &r0);
312 bn_addw(r0, c, &carry, &r0); 316 bn_addw(r0, c, &carry, &r0);
313 r1 += carry; 317 r1 += carry;
314 318
@@ -350,7 +354,7 @@ bn_mulw_addtw(BN_ULONG a, BN_ULONG b, BN_ULONG c2, BN_ULONG c1, BN_ULONG c0,
350{ 354{
351 BN_ULONG carry, r2, r1, r0, x1, x0; 355 BN_ULONG carry, r2, r1, r0, x1, x0;
352 356
353 bn_umul_hilo(a, b, &x1, &x0); 357 bn_mulw(a, b, &x1, &x0);
354 bn_addw(c0, x0, &carry, &r0); 358 bn_addw(c0, x0, &carry, &r0);
355 x1 += carry; 359 x1 += carry;
356 bn_addw(c1, x1, &carry, &r1); 360 bn_addw(c1, x1, &carry, &r1);