summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/lib/libcrypto/bn/asm/x86_64-gcc.c7
-rw-r--r--src/lib/libcrypto/bn/bn_asm.c150
-rw-r--r--src/lib/libcrypto/bn/bn_local.h156
3 files changed, 160 insertions, 153 deletions
diff --git a/src/lib/libcrypto/bn/asm/x86_64-gcc.c b/src/lib/libcrypto/bn/asm/x86_64-gcc.c
index e98ffe41e5..c6d6239bc2 100644
--- a/src/lib/libcrypto/bn/asm/x86_64-gcc.c
+++ b/src/lib/libcrypto/bn/asm/x86_64-gcc.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: x86_64-gcc.c,v 1.7 2022/11/26 16:08:51 tb Exp $ */ 1/* $OpenBSD: x86_64-gcc.c,v 1.8 2023/01/20 17:26:03 jsing Exp $ */
2#include "../bn_local.h" 2#include "../bn_local.h"
3/* 3/*
4 * x86_64 BIGNUM accelerator version 0.1, December 2002. 4 * x86_64 BIGNUM accelerator version 0.1, December 2002.
@@ -227,6 +227,11 @@ BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
227/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ 227/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
228/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ 228/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
229 229
230#undef mul_add_c
231#undef mul_add_c2
232#undef sqr_add_c
233#undef sqr_add_c2
234
230/* 235/*
231 * Keep in mind that carrying into high part of multiplication result 236 * Keep in mind that carrying into high part of multiplication result
232 * can not overflow, because it cannot be all-ones. 237 * can not overflow, because it cannot be all-ones.
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
index e5627cf6de..84063486b3 100644
--- a/src/lib/libcrypto/bn/bn_asm.c
+++ b/src/lib/libcrypto/bn/bn_asm.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_asm.c,v 1.17 2022/11/30 01:47:19 jsing Exp $ */ 1/* $OpenBSD: bn_asm.c,v 1.18 2023/01/20 17:26:03 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -484,154 +484,6 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
484#undef bn_sqr_comba8 484#undef bn_sqr_comba8
485#undef bn_sqr_comba4 485#undef bn_sqr_comba4
486 486
487/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
488/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
489/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
490/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
491
492#ifdef BN_LLONG
493/*
494 * Keep in mind that additions to multiplication result can not
495 * overflow, because its high half cannot be all-ones.
496 */
497#define mul_add_c(a,b,c0,c1,c2) do { \
498 BN_ULONG hi; \
499 BN_ULLONG t = (BN_ULLONG)(a)*(b); \
500 t += c0; /* no carry */ \
501 c0 = (BN_ULONG)Lw(t); \
502 hi = (BN_ULONG)Hw(t); \
503 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
504 } while(0)
505
506#define mul_add_c2(a,b,c0,c1,c2) do { \
507 BN_ULONG hi; \
508 BN_ULLONG t = (BN_ULLONG)(a)*(b); \
509 BN_ULLONG tt = t+c0; /* no carry */ \
510 c0 = (BN_ULONG)Lw(tt); \
511 hi = (BN_ULONG)Hw(tt); \
512 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
513 t += c0; /* no carry */ \
514 c0 = (BN_ULONG)Lw(t); \
515 hi = (BN_ULONG)Hw(t); \
516 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
517 } while(0)
518
519#define sqr_add_c(a,i,c0,c1,c2) do { \
520 BN_ULONG hi; \
521 BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \
522 t += c0; /* no carry */ \
523 c0 = (BN_ULONG)Lw(t); \
524 hi = (BN_ULONG)Hw(t); \
525 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
526 } while(0)
527
528#define sqr_add_c2(a,i,j,c0,c1,c2) \
529 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
530
531#elif defined(BN_UMULT_LOHI)
532/*
533 * Keep in mind that additions to hi can not overflow, because
534 * the high word of a multiplication result cannot be all-ones.
535 */
536#define mul_add_c(a,b,c0,c1,c2) do { \
537 BN_ULONG ta = (a), tb = (b); \
538 BN_ULONG lo, hi; \
539 BN_UMULT_LOHI(lo,hi,ta,tb); \
540 c0 += lo; hi += (c0<lo)?1:0; \
541 c1 += hi; c2 += (c1<hi)?1:0; \
542 } while(0)
543
544#define mul_add_c2(a,b,c0,c1,c2) do { \
545 BN_ULONG ta = (a), tb = (b); \
546 BN_ULONG lo, hi, tt; \
547 BN_UMULT_LOHI(lo,hi,ta,tb); \
548 c0 += lo; tt = hi+((c0<lo)?1:0); \
549 c1 += tt; c2 += (c1<tt)?1:0; \
550 c0 += lo; hi += (c0<lo)?1:0; \
551 c1 += hi; c2 += (c1<hi)?1:0; \
552 } while(0)
553
554#define sqr_add_c(a,i,c0,c1,c2) do { \
555 BN_ULONG ta = (a)[i]; \
556 BN_ULONG lo, hi; \
557 BN_UMULT_LOHI(lo,hi,ta,ta); \
558 c0 += lo; hi += (c0<lo)?1:0; \
559 c1 += hi; c2 += (c1<hi)?1:0; \
560 } while(0)
561
562#define sqr_add_c2(a,i,j,c0,c1,c2) \
563 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
564
565#elif defined(BN_UMULT_HIGH)
566/*
567 * Keep in mind that additions to hi can not overflow, because
568 * the high word of a multiplication result cannot be all-ones.
569 */
570#define mul_add_c(a,b,c0,c1,c2) do { \
571 BN_ULONG ta = (a), tb = (b); \
572 BN_ULONG lo = ta * tb; \
573 BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \
574 c0 += lo; hi += (c0<lo)?1:0; \
575 c1 += hi; c2 += (c1<hi)?1:0; \
576 } while(0)
577
578#define mul_add_c2(a,b,c0,c1,c2) do { \
579 BN_ULONG ta = (a), tb = (b), tt; \
580 BN_ULONG lo = ta * tb; \
581 BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \
582 c0 += lo; tt = hi + ((c0<lo)?1:0); \
583 c1 += tt; c2 += (c1<tt)?1:0; \
584 c0 += lo; hi += (c0<lo)?1:0; \
585 c1 += hi; c2 += (c1<hi)?1:0; \
586 } while(0)
587
588#define sqr_add_c(a,i,c0,c1,c2) do { \
589 BN_ULONG ta = (a)[i]; \
590 BN_ULONG lo = ta * ta; \
591 BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \
592 c0 += lo; hi += (c0<lo)?1:0; \
593 c1 += hi; c2 += (c1<hi)?1:0; \
594 } while(0)
595
596#define sqr_add_c2(a,i,j,c0,c1,c2) \
597 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
598
599#else /* !BN_LLONG */
600/*
601 * Keep in mind that additions to hi can not overflow, because
602 * the high word of a multiplication result cannot be all-ones.
603 */
604#define mul_add_c(a,b,c0,c1,c2) do { \
605 BN_ULONG lo = LBITS(a), hi = HBITS(a); \
606 BN_ULONG bl = LBITS(b), bh = HBITS(b); \
607 mul64(lo,hi,bl,bh); \
608 c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
609 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
610 } while(0)
611
612#define mul_add_c2(a,b,c0,c1,c2) do { \
613 BN_ULONG tt; \
614 BN_ULONG lo = LBITS(a), hi = HBITS(a); \
615 BN_ULONG bl = LBITS(b), bh = HBITS(b); \
616 mul64(lo,hi,bl,bh); \
617 tt = hi; \
618 c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \
619 c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \
620 c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
621 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
622 } while(0)
623
624#define sqr_add_c(a,i,c0,c1,c2) do { \
625 BN_ULONG lo, hi; \
626 sqr64(lo,hi,(a)[i]); \
627 c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
628 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
629 } while(0)
630
631#define sqr_add_c2(a,i,j,c0,c1,c2) \
632 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
633#endif /* !BN_LLONG */
634
635void 487void
636bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) 488bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
637{ 489{
diff --git a/src/lib/libcrypto/bn/bn_local.h b/src/lib/libcrypto/bn/bn_local.h
index 08e7064c5b..74e158d6fd 100644
--- a/src/lib/libcrypto/bn/bn_local.h
+++ b/src/lib/libcrypto/bn/bn_local.h
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_local.h,v 1.4 2023/01/20 12:16:46 jsing Exp $ */ 1/* $OpenBSD: bn_local.h,v 1.5 2023/01/20 17:26:03 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -481,15 +481,165 @@ struct bn_gencb_st {
481 } 481 }
482#endif /* !BN_LLONG */ 482#endif /* !BN_LLONG */
483 483
484/* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
485/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
486/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
487/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
488
489#ifdef BN_LLONG
490/*
491 * Keep in mind that additions to multiplication result can not
492 * overflow, because its high half cannot be all-ones.
493 */
494#define mul_add_c(a,b,c0,c1,c2) do { \
495 BN_ULONG hi; \
496 BN_ULLONG t = (BN_ULLONG)(a)*(b); \
497 t += c0; /* no carry */ \
498 c0 = (BN_ULONG)Lw(t); \
499 hi = (BN_ULONG)Hw(t); \
500 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
501 } while(0)
502
503#define mul_add_c2(a,b,c0,c1,c2) do { \
504 BN_ULONG hi; \
505 BN_ULLONG t = (BN_ULLONG)(a)*(b); \
506 BN_ULLONG tt = t+c0; /* no carry */ \
507 c0 = (BN_ULONG)Lw(tt); \
508 hi = (BN_ULONG)Hw(tt); \
509 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
510 t += c0; /* no carry */ \
511 c0 = (BN_ULONG)Lw(t); \
512 hi = (BN_ULONG)Hw(t); \
513 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
514 } while(0)
515
516#define sqr_add_c(a,i,c0,c1,c2) do { \
517 BN_ULONG hi; \
518 BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \
519 t += c0; /* no carry */ \
520 c0 = (BN_ULONG)Lw(t); \
521 hi = (BN_ULONG)Hw(t); \
522 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
523 } while(0)
524
525#define sqr_add_c2(a,i,j,c0,c1,c2) \
526 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
527
528#elif defined(BN_UMULT_LOHI)
529/*
530 * Keep in mind that additions to hi can not overflow, because
531 * the high word of a multiplication result cannot be all-ones.
532 */
533#define mul_add_c(a,b,c0,c1,c2) do { \
534 BN_ULONG ta = (a), tb = (b); \
535 BN_ULONG lo, hi; \
536 BN_UMULT_LOHI(lo,hi,ta,tb); \
537 c0 += lo; hi += (c0<lo)?1:0; \
538 c1 += hi; c2 += (c1<hi)?1:0; \
539 } while(0)
540
541#define mul_add_c2(a,b,c0,c1,c2) do { \
542 BN_ULONG ta = (a), tb = (b); \
543 BN_ULONG lo, hi, tt; \
544 BN_UMULT_LOHI(lo,hi,ta,tb); \
545 c0 += lo; tt = hi+((c0<lo)?1:0); \
546 c1 += tt; c2 += (c1<tt)?1:0; \
547 c0 += lo; hi += (c0<lo)?1:0; \
548 c1 += hi; c2 += (c1<hi)?1:0; \
549 } while(0)
550
551#define sqr_add_c(a,i,c0,c1,c2) do { \
552 BN_ULONG ta = (a)[i]; \
553 BN_ULONG lo, hi; \
554 BN_UMULT_LOHI(lo,hi,ta,ta); \
555 c0 += lo; hi += (c0<lo)?1:0; \
556 c1 += hi; c2 += (c1<hi)?1:0; \
557 } while(0)
558
559#define sqr_add_c2(a,i,j,c0,c1,c2) \
560 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
561
562#elif defined(BN_UMULT_HIGH)
563/*
564 * Keep in mind that additions to hi can not overflow, because
565 * the high word of a multiplication result cannot be all-ones.
566 */
567#define mul_add_c(a,b,c0,c1,c2) do { \
568 BN_ULONG ta = (a), tb = (b); \
569 BN_ULONG lo = ta * tb; \
570 BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \
571 c0 += lo; hi += (c0<lo)?1:0; \
572 c1 += hi; c2 += (c1<hi)?1:0; \
573 } while(0)
574
575#define mul_add_c2(a,b,c0,c1,c2) do { \
576 BN_ULONG ta = (a), tb = (b), tt; \
577 BN_ULONG lo = ta * tb; \
578 BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \
579 c0 += lo; tt = hi + ((c0<lo)?1:0); \
580 c1 += tt; c2 += (c1<tt)?1:0; \
581 c0 += lo; hi += (c0<lo)?1:0; \
582 c1 += hi; c2 += (c1<hi)?1:0; \
583 } while(0)
584
585#define sqr_add_c(a,i,c0,c1,c2) do { \
586 BN_ULONG ta = (a)[i]; \
587 BN_ULONG lo = ta * ta; \
588 BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \
589 c0 += lo; hi += (c0<lo)?1:0; \
590 c1 += hi; c2 += (c1<hi)?1:0; \
591 } while(0)
592
593#define sqr_add_c2(a,i,j,c0,c1,c2) \
594 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
595
596#else /* !BN_LLONG */
597/*
598 * Keep in mind that additions to hi can not overflow, because
599 * the high word of a multiplication result cannot be all-ones.
600 */
601#define mul_add_c(a,b,c0,c1,c2) do { \
602 BN_ULONG lo = LBITS(a), hi = HBITS(a); \
603 BN_ULONG bl = LBITS(b), bh = HBITS(b); \
604 mul64(lo,hi,bl,bh); \
605 c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
606 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
607 } while(0)
608
609#define mul_add_c2(a,b,c0,c1,c2) do { \
610 BN_ULONG tt; \
611 BN_ULONG lo = LBITS(a), hi = HBITS(a); \
612 BN_ULONG bl = LBITS(b), bh = HBITS(b); \
613 mul64(lo,hi,bl,bh); \
614 tt = hi; \
615 c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \
616 c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \
617 c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
618 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
619 } while(0)
620
621#define sqr_add_c(a,i,c0,c1,c2) do { \
622 BN_ULONG lo, hi; \
623 sqr64(lo,hi,(a)[i]); \
624 c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
625 c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
626 } while(0)
627
628#define sqr_add_c2(a,i,j,c0,c1,c2) \
629 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
630#endif /* !BN_LLONG */
631
484/* The least significant word of a BIGNUM. */ 632/* The least significant word of a BIGNUM. */
485#define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0]) 633#define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])
486 634
487void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb); 635void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb);
488void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
489void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); 636void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
637void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
638
490void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp); 639void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
491void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
492void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a); 640void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a);
641void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
642
493int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n); 643int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n);
494int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, 644int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
495 int cl, int dl); 645 int cl, int dl);