From a50b434b87829ee0d12767c21ae98194684ab720 Mon Sep 17 00:00:00 2001
From: jsing <>
Date: Fri, 20 Jan 2023 17:26:03 +0000
Subject: Move {mul,sqr}_add_c{,2} macros from bn_asm.c to bn_local.h.

These depend on other macros that are in already in bn_local.h and this
makes them available to other source files. A lot more clean up will be
needed in the future.

Of course x86_64-gcc.c makes use of the same macro names - sprinkle some
undef in there for the time being.

ok tb@
---
 src/lib/libcrypto/bn/asm/x86_64-gcc.c |   7 +-
 src/lib/libcrypto/bn/bn_asm.c         | 150 +-------------------------------
 src/lib/libcrypto/bn/bn_local.h       | 156 +++++++++++++++++++++++++++++++++-
 3 files changed, 160 insertions(+), 153 deletions(-)

(limited to 'src')

diff --git a/src/lib/libcrypto/bn/asm/x86_64-gcc.c b/src/lib/libcrypto/bn/asm/x86_64-gcc.c
index e98ffe41e5..c6d6239bc2 100644
--- a/src/lib/libcrypto/bn/asm/x86_64-gcc.c
+++ b/src/lib/libcrypto/bn/asm/x86_64-gcc.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: x86_64-gcc.c,v 1.7 2022/11/26 16:08:51 tb Exp $ */
+/* $OpenBSD: x86_64-gcc.c,v 1.8 2023/01/20 17:26:03 jsing Exp $ */
 #include "../bn_local.h"
 /*
  * x86_64 BIGNUM accelerator version 0.1, December 2002.
@@ -227,6 +227,11 @@ BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
 /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
 
+#undef mul_add_c
+#undef mul_add_c2
+#undef sqr_add_c
+#undef sqr_add_c2
+
 /*
  * Keep in mind that carrying into high part of multiplication result
  * can not overflow, because it cannot be all-ones.
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
index e5627cf6de..84063486b3 100644
--- a/src/lib/libcrypto/bn/bn_asm.c
+++ b/src/lib/libcrypto/bn/bn_asm.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: bn_asm.c,v 1.17 2022/11/30 01:47:19 jsing Exp $ */
+/* $OpenBSD: bn_asm.c,v 1.18 2023/01/20 17:26:03 jsing Exp $ */
 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  * All rights reserved.
  *
@@ -484,154 +484,6 @@ bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n)
 #undef bn_sqr_comba8
 #undef bn_sqr_comba4
 
-/* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
-/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
-/* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
-/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
-
-#ifdef BN_LLONG
-/*
- * Keep in mind that additions to multiplication result can not
- * overflow, because its high half cannot be all-ones.
- */
-#define mul_add_c(a,b,c0,c1,c2)		do {	\
-	BN_ULONG hi;				\
-	BN_ULLONG t = (BN_ULLONG)(a)*(b);	\
-	t += c0;		/* no carry */	\
-	c0 = (BN_ULONG)Lw(t);			\
-	hi = (BN_ULONG)Hw(t);			\
-	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
-	} while(0)
-
-#define mul_add_c2(a,b,c0,c1,c2)	do {	\
-	BN_ULONG hi;				\
-	BN_ULLONG t = (BN_ULLONG)(a)*(b);	\
-	BN_ULLONG tt = t+c0;	/* no carry */	\
-	c0 = (BN_ULONG)Lw(tt);			\
-	hi = (BN_ULONG)Hw(tt);			\
-	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
-	t += c0;		/* no carry */	\
-	c0 = (BN_ULONG)Lw(t);			\
-	hi = (BN_ULONG)Hw(t);			\
-	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
-	} while(0)
-
-#define sqr_add_c(a,i,c0,c1,c2)		do {	\
-	BN_ULONG hi;				\
-	BN_ULLONG t = (BN_ULLONG)a[i]*a[i];	\
-	t += c0;		/* no carry */	\
-	c0 = (BN_ULONG)Lw(t);			\
-	hi = (BN_ULONG)Hw(t);			\
-	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
-	} while(0)
-
-#define sqr_add_c2(a,i,j,c0,c1,c2) \
-	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
-
-#elif defined(BN_UMULT_LOHI)
-/*
- * Keep in mind that additions to hi can not overflow, because
- * the high word of a multiplication result cannot be all-ones.
- */
-#define mul_add_c(a,b,c0,c1,c2)		do {	\
-	BN_ULONG ta = (a), tb = (b);		\
-	BN_ULONG lo, hi;			\
-	BN_UMULT_LOHI(lo,hi,ta,tb);		\
-	c0 += lo; hi += (c0<lo)?1:0;		\
-	c1 += hi; c2 += (c1<hi)?1:0;		\
-	} while(0)
-
-#define mul_add_c2(a,b,c0,c1,c2)	do {	\
-	BN_ULONG ta = (a), tb = (b);		\
-	BN_ULONG lo, hi, tt;			\
-	BN_UMULT_LOHI(lo,hi,ta,tb);		\
-	c0 += lo; tt = hi+((c0<lo)?1:0);	\
-	c1 += tt; c2 += (c1<tt)?1:0;		\
-	c0 += lo; hi += (c0<lo)?1:0;		\
-	c1 += hi; c2 += (c1<hi)?1:0;		\
-	} while(0)
-
-#define sqr_add_c(a,i,c0,c1,c2)		do {	\
-	BN_ULONG ta = (a)[i];			\
-	BN_ULONG lo, hi;			\
-	BN_UMULT_LOHI(lo,hi,ta,ta);		\
-	c0 += lo; hi += (c0<lo)?1:0;		\
-	c1 += hi; c2 += (c1<hi)?1:0;		\
-	} while(0)
-
-#define sqr_add_c2(a,i,j,c0,c1,c2)	\
-	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
-
-#elif defined(BN_UMULT_HIGH)
-/*
- * Keep in mind that additions to hi can not overflow, because
- * the high word of a multiplication result cannot be all-ones.
- */
-#define mul_add_c(a,b,c0,c1,c2)		do {	\
-	BN_ULONG ta = (a), tb = (b);		\
-	BN_ULONG lo = ta * tb;			\
-	BN_ULONG hi = BN_UMULT_HIGH(ta,tb);	\
-	c0 += lo; hi += (c0<lo)?1:0;		\
-	c1 += hi; c2 += (c1<hi)?1:0;		\
-	} while(0)
-
-#define mul_add_c2(a,b,c0,c1,c2)	do {	\
-	BN_ULONG ta = (a), tb = (b), tt;	\
-	BN_ULONG lo = ta * tb;			\
-	BN_ULONG hi = BN_UMULT_HIGH(ta,tb);	\
-	c0 += lo; tt = hi + ((c0<lo)?1:0);	\
-	c1 += tt; c2 += (c1<tt)?1:0;		\
-	c0 += lo; hi += (c0<lo)?1:0;		\
-	c1 += hi; c2 += (c1<hi)?1:0;		\
-	} while(0)
-
-#define sqr_add_c(a,i,c0,c1,c2)		do {	\
-	BN_ULONG ta = (a)[i];			\
-	BN_ULONG lo = ta * ta;			\
-	BN_ULONG hi = BN_UMULT_HIGH(ta,ta);	\
-	c0 += lo; hi += (c0<lo)?1:0;		\
-	c1 += hi; c2 += (c1<hi)?1:0;		\
-	} while(0)
-
-#define sqr_add_c2(a,i,j,c0,c1,c2)	\
-	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
-
-#else /* !BN_LLONG */
-/*
- * Keep in mind that additions to hi can not overflow, because
- * the high word of a multiplication result cannot be all-ones.
- */
-#define mul_add_c(a,b,c0,c1,c2)		do {	\
-	BN_ULONG lo = LBITS(a), hi = HBITS(a);	\
-	BN_ULONG bl = LBITS(b), bh = HBITS(b);	\
-	mul64(lo,hi,bl,bh);			\
-	c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++;	\
-	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
-	} while(0)
-
-#define mul_add_c2(a,b,c0,c1,c2)	do {	\
-	BN_ULONG tt;				\
-	BN_ULONG lo = LBITS(a), hi = HBITS(a);	\
-	BN_ULONG bl = LBITS(b), bh = HBITS(b);	\
-	mul64(lo,hi,bl,bh);			\
-	tt = hi;				\
-	c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++;	\
-	c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++;	\
-	c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++;	\
-	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
-	} while(0)
-
-#define sqr_add_c(a,i,c0,c1,c2)		do {	\
-	BN_ULONG lo, hi;			\
-	sqr64(lo,hi,(a)[i]);			\
-	c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++;	\
-	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
-	} while(0)
-
-#define sqr_add_c2(a,i,j,c0,c1,c2) \
-	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
-#endif /* !BN_LLONG */
-
 void
 bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
 {
diff --git a/src/lib/libcrypto/bn/bn_local.h b/src/lib/libcrypto/bn/bn_local.h
index 08e7064c5b..74e158d6fd 100644
--- a/src/lib/libcrypto/bn/bn_local.h
+++ b/src/lib/libcrypto/bn/bn_local.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: bn_local.h,v 1.4 2023/01/20 12:16:46 jsing Exp $ */
+/* $OpenBSD: bn_local.h,v 1.5 2023/01/20 17:26:03 jsing Exp $ */
 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  * All rights reserved.
  *
@@ -481,15 +481,165 @@ struct bn_gencb_st {
 	}
 #endif /* !BN_LLONG */
 
+/* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
+/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
+/* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
+/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
+
+#ifdef BN_LLONG
+/*
+ * Keep in mind that additions to multiplication result can not
+ * overflow, because its high half cannot be all-ones.
+ */
+#define mul_add_c(a,b,c0,c1,c2)		do {	\
+	BN_ULONG hi;				\
+	BN_ULLONG t = (BN_ULLONG)(a)*(b);	\
+	t += c0;		/* no carry */	\
+	c0 = (BN_ULONG)Lw(t);			\
+	hi = (BN_ULONG)Hw(t);			\
+	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
+	} while(0)
+
+#define mul_add_c2(a,b,c0,c1,c2)	do {	\
+	BN_ULONG hi;				\
+	BN_ULLONG t = (BN_ULLONG)(a)*(b);	\
+	BN_ULLONG tt = t+c0;	/* no carry */	\
+	c0 = (BN_ULONG)Lw(tt);			\
+	hi = (BN_ULONG)Hw(tt);			\
+	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
+	t += c0;		/* no carry */	\
+	c0 = (BN_ULONG)Lw(t);			\
+	hi = (BN_ULONG)Hw(t);			\
+	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
+	} while(0)
+
+#define sqr_add_c(a,i,c0,c1,c2)		do {	\
+	BN_ULONG hi;				\
+	BN_ULLONG t = (BN_ULLONG)a[i]*a[i];	\
+	t += c0;		/* no carry */	\
+	c0 = (BN_ULONG)Lw(t);			\
+	hi = (BN_ULONG)Hw(t);			\
+	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
+	} while(0)
+
+#define sqr_add_c2(a,i,j,c0,c1,c2) \
+	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+
+#elif defined(BN_UMULT_LOHI)
+/*
+ * Keep in mind that additions to hi can not overflow, because
+ * the high word of a multiplication result cannot be all-ones.
+ */
+#define mul_add_c(a,b,c0,c1,c2)		do {	\
+	BN_ULONG ta = (a), tb = (b);		\
+	BN_ULONG lo, hi;			\
+	BN_UMULT_LOHI(lo,hi,ta,tb);		\
+	c0 += lo; hi += (c0<lo)?1:0;		\
+	c1 += hi; c2 += (c1<hi)?1:0;		\
+	} while(0)
+
+#define mul_add_c2(a,b,c0,c1,c2)	do {	\
+	BN_ULONG ta = (a), tb = (b);		\
+	BN_ULONG lo, hi, tt;			\
+	BN_UMULT_LOHI(lo,hi,ta,tb);		\
+	c0 += lo; tt = hi+((c0<lo)?1:0);	\
+	c1 += tt; c2 += (c1<tt)?1:0;		\
+	c0 += lo; hi += (c0<lo)?1:0;		\
+	c1 += hi; c2 += (c1<hi)?1:0;		\
+	} while(0)
+
+#define sqr_add_c(a,i,c0,c1,c2)		do {	\
+	BN_ULONG ta = (a)[i];			\
+	BN_ULONG lo, hi;			\
+	BN_UMULT_LOHI(lo,hi,ta,ta);		\
+	c0 += lo; hi += (c0<lo)?1:0;		\
+	c1 += hi; c2 += (c1<hi)?1:0;		\
+	} while(0)
+
+#define sqr_add_c2(a,i,j,c0,c1,c2)	\
+	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+
+#elif defined(BN_UMULT_HIGH)
+/*
+ * Keep in mind that additions to hi can not overflow, because
+ * the high word of a multiplication result cannot be all-ones.
+ */
+#define mul_add_c(a,b,c0,c1,c2)		do {	\
+	BN_ULONG ta = (a), tb = (b);		\
+	BN_ULONG lo = ta * tb;			\
+	BN_ULONG hi = BN_UMULT_HIGH(ta,tb);	\
+	c0 += lo; hi += (c0<lo)?1:0;		\
+	c1 += hi; c2 += (c1<hi)?1:0;		\
+	} while(0)
+
+#define mul_add_c2(a,b,c0,c1,c2)	do {	\
+	BN_ULONG ta = (a), tb = (b), tt;	\
+	BN_ULONG lo = ta * tb;			\
+	BN_ULONG hi = BN_UMULT_HIGH(ta,tb);	\
+	c0 += lo; tt = hi + ((c0<lo)?1:0);	\
+	c1 += tt; c2 += (c1<tt)?1:0;		\
+	c0 += lo; hi += (c0<lo)?1:0;		\
+	c1 += hi; c2 += (c1<hi)?1:0;		\
+	} while(0)
+
+#define sqr_add_c(a,i,c0,c1,c2)		do {	\
+	BN_ULONG ta = (a)[i];			\
+	BN_ULONG lo = ta * ta;			\
+	BN_ULONG hi = BN_UMULT_HIGH(ta,ta);	\
+	c0 += lo; hi += (c0<lo)?1:0;		\
+	c1 += hi; c2 += (c1<hi)?1:0;		\
+	} while(0)
+
+#define sqr_add_c2(a,i,j,c0,c1,c2)	\
+	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+
+#else /* !BN_LLONG */
+/*
+ * Keep in mind that additions to hi can not overflow, because
+ * the high word of a multiplication result cannot be all-ones.
+ */
+#define mul_add_c(a,b,c0,c1,c2)		do {	\
+	BN_ULONG lo = LBITS(a), hi = HBITS(a);	\
+	BN_ULONG bl = LBITS(b), bh = HBITS(b);	\
+	mul64(lo,hi,bl,bh);			\
+	c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++;	\
+	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
+	} while(0)
+
+#define mul_add_c2(a,b,c0,c1,c2)	do {	\
+	BN_ULONG tt;				\
+	BN_ULONG lo = LBITS(a), hi = HBITS(a);	\
+	BN_ULONG bl = LBITS(b), bh = HBITS(b);	\
+	mul64(lo,hi,bl,bh);			\
+	tt = hi;				\
+	c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++;	\
+	c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++;	\
+	c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++;	\
+	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
+	} while(0)
+
+#define sqr_add_c(a,i,c0,c1,c2)		do {	\
+	BN_ULONG lo, hi;			\
+	sqr64(lo,hi,(a)[i]);			\
+	c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++;	\
+	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++;	\
+	} while(0)
+
+#define sqr_add_c2(a,i,j,c0,c1,c2) \
+	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+#endif /* !BN_LLONG */
+
 /* The least significant word of a BIGNUM. */
 #define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])
 
 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb);
-void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
+void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
+
 void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
-void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a);
+void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
+
 int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n);
 int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
     int cl, int dl);
-- 
cgit v1.2.3-55-g6feb