diff options
author | jsing <> | 2023-02-17 05:30:20 +0000 |
---|---|---|
committer | jsing <> | 2023-02-17 05:30:20 +0000 |
commit | d58b728cbd6aaa07432f9003f6cd5b9687d0eede (patch) | |
tree | 3f07aabc00a657ef33b153b4fe4ae290c8eb374e | |
parent | b7f1c098b1a50519f08f8112820c6dcf50a9f2f0 (diff) | |
download | openbsd-d58b728cbd6aaa07432f9003f6cd5b9687d0eede.tar.gz openbsd-d58b728cbd6aaa07432f9003f6cd5b9687d0eede.tar.bz2 openbsd-d58b728cbd6aaa07432f9003f6cd5b9687d0eede.zip |
Remove now unused tangle of mul*/sqr* and BN_UMULT_* macros.
No, I'm not trying to overwhelm you... however, we really no longer need
this clutter.
ok tb@
-rw-r--r-- | src/lib/libcrypto/bn/bn_local.h | 252 |
1 files changed, 1 insertions, 251 deletions
diff --git a/src/lib/libcrypto/bn/bn_local.h b/src/lib/libcrypto/bn/bn_local.h index 51582f9833..6d308218e7 100644 --- a/src/lib/libcrypto/bn/bn_local.h +++ b/src/lib/libcrypto/bn/bn_local.h | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_local.h,v 1.10 2023/02/16 11:13:05 jsing Exp $ */ | 1 | /* $OpenBSD: bn_local.h,v 1.11 2023/02/17 05:30:20 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -241,256 +241,6 @@ struct bn_gencb_st { | |||
241 | #define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */ | 241 | #define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */ |
242 | #define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */ | 242 | #define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */ |
243 | 243 | ||
244 | #if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) | ||
245 | /* | ||
246 | * BN_UMULT_HIGH section. | ||
247 | * | ||
248 | * No, I'm not trying to overwhelm you when stating that the | ||
249 | * product of N-bit numbers is 2*N bits wide:-) No, I don't expect | ||
250 | * you to be impressed when I say that if the compiler doesn't | ||
251 | * support 2*N integer type, then you have to replace every N*N | ||
252 | * multiplication with 4 (N/2)*(N/2) accompanied by some shifts | ||
253 | * and additions which unavoidably results in severe performance | ||
254 | * penalties. Of course provided that the hardware is capable of | ||
255 | * producing 2*N result... That's when you normally start | ||
256 | * considering assembler implementation. However! It should be | ||
257 | * pointed out that some CPUs (most notably Alpha, PowerPC and | ||
258 | * upcoming IA-64 family:-) provide *separate* instruction | ||
259 | * calculating the upper half of the product placing the result | ||
260 | * into a general purpose register. Now *if* the compiler supports | ||
261 | * inline assembler, then it's not impossible to implement the | ||
262 | * "bignum" routines (and have the compiler optimize 'em) | ||
263 | * exhibiting "native" performance in C. That's what BN_UMULT_HIGH | ||
264 | * macro is about:-) | ||
265 | * | ||
266 | * <appro@fy.chalmers.se> | ||
267 | */ | ||
268 | # if defined(__alpha) | ||
269 | # if defined(__GNUC__) && __GNUC__>=2 | ||
270 | # define BN_UMULT_HIGH(a,b) ({ \ | ||
271 | BN_ULONG ret; \ | ||
272 | asm ("umulh %1,%2,%0" \ | ||
273 | : "=r"(ret) \ | ||
274 | : "r"(a), "r"(b)); \ | ||
275 | ret; }) | ||
276 | # endif /* compiler */ | ||
277 | # elif defined(_ARCH_PPC) && defined(_LP64) | ||
278 | # if defined(__GNUC__) && __GNUC__>=2 | ||
279 | # define BN_UMULT_HIGH(a,b) ({ \ | ||
280 | BN_ULONG ret; \ | ||
281 | asm ("mulhdu %0,%1,%2" \ | ||
282 | : "=r"(ret) \ | ||
283 | : "r"(a), "r"(b)); \ | ||
284 | ret; }) | ||
285 | # endif /* compiler */ | ||
286 | # elif defined(__x86_64) || defined(__x86_64__) | ||
287 | # if defined(__GNUC__) && __GNUC__>=2 | ||
288 | # define BN_UMULT_HIGH(a,b) ({ \ | ||
289 | BN_ULONG ret,discard; \ | ||
290 | asm ("mulq %3" \ | ||
291 | : "=a"(discard),"=d"(ret) \ | ||
292 | : "a"(a), "g"(b) \ | ||
293 | : "cc"); \ | ||
294 | ret; }) | ||
295 | # define BN_UMULT_LOHI(low,high,a,b) \ | ||
296 | asm ("mulq %3" \ | ||
297 | : "=a"(low),"=d"(high) \ | ||
298 | : "a"(a),"g"(b) \ | ||
299 | : "cc"); | ||
300 | # endif | ||
301 | # elif defined(__mips) && defined(_LP64) | ||
302 | # if defined(__GNUC__) && __GNUC__>=2 | ||
303 | # if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) /* "h" constraint is no more since 4.4 */ | ||
304 | # define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64) | ||
305 | # define BN_UMULT_LOHI(low,high,a,b) ({ \ | ||
306 | __uint128_t ret=(__uint128_t)(a)*(b); \ | ||
307 | (high)=ret>>64; (low)=ret; }) | ||
308 | # else | ||
309 | # define BN_UMULT_HIGH(a,b) ({ \ | ||
310 | BN_ULONG ret; \ | ||
311 | asm ("dmultu %1,%2" \ | ||
312 | : "=h"(ret) \ | ||
313 | : "r"(a), "r"(b) : "l"); \ | ||
314 | ret; }) | ||
315 | # define BN_UMULT_LOHI(low,high,a,b)\ | ||
316 | asm ("dmultu %2,%3" \ | ||
317 | : "=l"(low),"=h"(high) \ | ||
318 | : "r"(a), "r"(b)); | ||
319 | # endif | ||
320 | # endif | ||
321 | # endif /* cpu */ | ||
322 | #endif /* OPENSSL_NO_ASM */ | ||
323 | |||
324 | /************************************************************* | ||
325 | * Using the long long type | ||
326 | */ | ||
327 | #define Lw(t) (((BN_ULONG)(t))&BN_MASK2) | ||
328 | #define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) | ||
329 | |||
330 | #ifndef BN_LLONG | ||
331 | /************************************************************* | ||
332 | * No long long type | ||
333 | */ | ||
334 | |||
335 | #define LBITS(a) ((a)&BN_MASK2l) | ||
336 | #define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) | ||
337 | #define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2) | ||
338 | |||
339 | #define mul64(l,h,bl,bh) \ | ||
340 | { \ | ||
341 | BN_ULONG m,m1,lt,ht; \ | ||
342 | \ | ||
343 | lt=l; \ | ||
344 | ht=h; \ | ||
345 | m =(bh)*(lt); \ | ||
346 | lt=(bl)*(lt); \ | ||
347 | m1=(bl)*(ht); \ | ||
348 | ht =(bh)*(ht); \ | ||
349 | m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \ | ||
350 | ht+=HBITS(m); \ | ||
351 | m1=L2HBITS(m); \ | ||
352 | lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \ | ||
353 | (l)=lt; \ | ||
354 | (h)=ht; \ | ||
355 | } | ||
356 | |||
357 | #define sqr64(lo,ho,in) \ | ||
358 | { \ | ||
359 | BN_ULONG l,h,m; \ | ||
360 | \ | ||
361 | h=(in); \ | ||
362 | l=LBITS(h); \ | ||
363 | h=HBITS(h); \ | ||
364 | m =(l)*(h); \ | ||
365 | l*=l; \ | ||
366 | h*=h; \ | ||
367 | h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \ | ||
368 | m =(m&BN_MASK2l)<<(BN_BITS4+1); \ | ||
369 | l=(l+m)&BN_MASK2; if (l < m) h++; \ | ||
370 | (lo)=l; \ | ||
371 | (ho)=h; \ | ||
372 | } | ||
373 | |||
374 | #endif /* !BN_LLONG */ | ||
375 | |||
376 | /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ | ||
377 | /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ | ||
378 | /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ | ||
379 | |||
380 | #ifdef BN_LLONG | ||
381 | /* | ||
382 | * Keep in mind that additions to multiplication result can not | ||
383 | * overflow, because its high half cannot be all-ones. | ||
384 | */ | ||
385 | |||
386 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | ||
387 | BN_ULONG hi; \ | ||
388 | BN_ULLONG t = (BN_ULLONG)(a)*(b); \ | ||
389 | BN_ULLONG tt = t+c0; /* no carry */ \ | ||
390 | c0 = (BN_ULONG)Lw(tt); \ | ||
391 | hi = (BN_ULONG)Hw(tt); \ | ||
392 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
393 | t += c0; /* no carry */ \ | ||
394 | c0 = (BN_ULONG)Lw(t); \ | ||
395 | hi = (BN_ULONG)Hw(t); \ | ||
396 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
397 | } while(0) | ||
398 | |||
399 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
400 | BN_ULONG hi; \ | ||
401 | BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \ | ||
402 | t += c0; /* no carry */ \ | ||
403 | c0 = (BN_ULONG)Lw(t); \ | ||
404 | hi = (BN_ULONG)Hw(t); \ | ||
405 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
406 | } while(0) | ||
407 | |||
408 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
409 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
410 | |||
411 | #elif defined(BN_UMULT_LOHI) | ||
412 | /* | ||
413 | * Keep in mind that additions to hi can not overflow, because | ||
414 | * the high word of a multiplication result cannot be all-ones. | ||
415 | */ | ||
416 | |||
417 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | ||
418 | BN_ULONG ta = (a), tb = (b); \ | ||
419 | BN_ULONG lo, hi, tt; \ | ||
420 | BN_UMULT_LOHI(lo,hi,ta,tb); \ | ||
421 | c0 += lo; tt = hi+((c0<lo)?1:0); \ | ||
422 | c1 += tt; c2 += (c1<tt)?1:0; \ | ||
423 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
424 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
425 | } while(0) | ||
426 | |||
427 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
428 | BN_ULONG ta = (a)[i]; \ | ||
429 | BN_ULONG lo, hi; \ | ||
430 | BN_UMULT_LOHI(lo,hi,ta,ta); \ | ||
431 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
432 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
433 | } while(0) | ||
434 | |||
435 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
436 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
437 | |||
438 | #elif defined(BN_UMULT_HIGH) | ||
439 | /* | ||
440 | * Keep in mind that additions to hi can not overflow, because | ||
441 | * the high word of a multiplication result cannot be all-ones. | ||
442 | */ | ||
443 | |||
444 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | ||
445 | BN_ULONG ta = (a), tb = (b), tt; \ | ||
446 | BN_ULONG lo = ta * tb; \ | ||
447 | BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \ | ||
448 | c0 += lo; tt = hi + ((c0<lo)?1:0); \ | ||
449 | c1 += tt; c2 += (c1<tt)?1:0; \ | ||
450 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
451 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
452 | } while(0) | ||
453 | |||
454 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
455 | BN_ULONG ta = (a)[i]; \ | ||
456 | BN_ULONG lo = ta * ta; \ | ||
457 | BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \ | ||
458 | c0 += lo; hi += (c0<lo)?1:0; \ | ||
459 | c1 += hi; c2 += (c1<hi)?1:0; \ | ||
460 | } while(0) | ||
461 | |||
462 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
463 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
464 | |||
465 | #else /* !BN_LLONG */ | ||
466 | /* | ||
467 | * Keep in mind that additions to hi can not overflow, because | ||
468 | * the high word of a multiplication result cannot be all-ones. | ||
469 | */ | ||
470 | |||
471 | #define mul_add_c2(a,b,c0,c1,c2) do { \ | ||
472 | BN_ULONG tt; \ | ||
473 | BN_ULONG lo = LBITS(a), hi = HBITS(a); \ | ||
474 | BN_ULONG bl = LBITS(b), bh = HBITS(b); \ | ||
475 | mul64(lo,hi,bl,bh); \ | ||
476 | tt = hi; \ | ||
477 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \ | ||
478 | c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \ | ||
479 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | ||
480 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
481 | } while(0) | ||
482 | |||
483 | #define sqr_add_c(a,i,c0,c1,c2) do { \ | ||
484 | BN_ULONG lo, hi; \ | ||
485 | sqr64(lo,hi,(a)[i]); \ | ||
486 | c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \ | ||
487 | c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \ | ||
488 | } while(0) | ||
489 | |||
490 | #define sqr_add_c2(a,i,j,c0,c1,c2) \ | ||
491 | mul_add_c2((a)[i],(a)[j],c0,c1,c2) | ||
492 | #endif /* !BN_LLONG */ | ||
493 | |||
494 | /* The least significant word of a BIGNUM. */ | 244 | /* The least significant word of a BIGNUM. */ |
495 | #define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0]) | 245 | #define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0]) |
496 | 246 | ||