Remove now unused tangle of mul*/sqr* and BN_UMULT_* macros.

No, I'm not trying to overwhelm you... however, we really no longer need this clutter. ok tb@
author: jsing <> 2023-02-17 05:30:20 +0000
committer: jsing <> 2023-02-17 05:30:20 +0000
commit: d58b728cbd6aaa07432f9003f6cd5b9687d0eede (patch)
tree: 3f07aabc00a657ef33b153b4fe4ae290c8eb374e /src/lib
parent: b7f1c098b1a50519f08f8112820c6dcf50a9f2f0 (diff)
download: openbsd-d58b728cbd6aaa07432f9003f6cd5b9687d0eede.tar.gz
openbsd-d58b728cbd6aaa07432f9003f6cd5b9687d0eede.tar.bz2
openbsd-d58b728cbd6aaa07432f9003f6cd5b9687d0eede.zip
1 files changed, 1 insertions, 251 deletions
diff --git a/src/lib/libcrypto/bn/bn_local.h b/src/lib/libcrypto/bn/bn_local.h
index 51582f9833..6d308218e7 100644
--- a/src/lib/libcrypto/bn/bn_local.h
+++ b/src/lib/libcrypto/bn/bn_local.h
@@ -1,4 +1,4 @@
-/* $OpenBSD: bn_local.h,v 1.10 2023/02/16 11:13:05 jsing Exp $ */
+/* $OpenBSD: bn_local.h,v 1.11 2023/02/17 05:30:20 jsing Exp $ */
 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
 * All rights reserved.
 *
@@ -241,256 +241,6 @@ struct bn_gencb_st {
 #define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL        (32) /* 32 */
 #define BN_MONT_CTX_SET_SIZE_WORD               (64) /* 32 */
-#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
-/*
- * BN_UMULT_HIGH section.
- *
- * No, I'm not trying to overwhelm you when stating that the
- * product of N-bit numbers is 2*N bits wide:-) No, I don't expect
- * you to be impressed when I say that if the compiler doesn't
- * support 2*N integer type, then you have to replace every N*N
- * multiplication with 4 (N/2)*(N/2) accompanied by some shifts
- * and additions which unavoidably results in severe performance
- * penalties. Of course provided that the hardware is capable of
- * producing 2*N result... That's when you normally start
- * considering assembler implementation. However! It should be
- * pointed out that some CPUs (most notably Alpha, PowerPC and
- * upcoming IA-64 family:-) provide *separate* instruction
- * calculating the upper half of the product placing the result
- * into a general purpose register. Now *if* the compiler supports
- * inline assembler, then it's not impossible to implement the
- * "bignum" routines (and have the compiler optimize 'em)
- * exhibiting "native" performance in C. That's what BN_UMULT_HIGH
- * macro is about:-)
- *
- *                                      <appro@fy.chalmers.se>
- */
-# if defined(__alpha)
-#  if defined(__GNUC__) && __GNUC__>=2
-#   define BN_UMULT_HIGH(a,b)   ({      \
-        BN_ULONG ret;           \
-        asm ("umulh     %1,%2,%0"       \
-             : "=r"(ret)                \
-             : "r"(a), "r"(b));         \
-        ret;                    })
-#  endif        /* compiler */
-# elif defined(_ARCH_PPC) && defined(_LP64)
-#  if defined(__GNUC__) && __GNUC__>=2
-#   define BN_UMULT_HIGH(a,b)   ({      \
-        BN_ULONG ret;           \
-        asm ("mulhdu    %0,%1,%2"       \
-             : "=r"(ret)                \
-             : "r"(a), "r"(b));         \
-        ret;                    })
-#  endif        /* compiler */
-# elif defined(__x86_64) || defined(__x86_64__)
-#  if defined(__GNUC__) && __GNUC__>=2
-#   define BN_UMULT_HIGH(a,b)   ({      \
-        BN_ULONG ret,discard;   \
-        asm ("mulq      %3"             \
-             : "=a"(discard),"=d"(ret)  \
-             : "a"(a), "g"(b)           \
-             : "cc");                   \
-        ret;                    })
-#   define BN_UMULT_LOHI(low,high,a,b)  \
-        asm ("mulq      %3"             \
-                : "=a"(low),"=d"(high)  \
-                : "a"(a),"g"(b)         \
-                : "cc");
-#  endif
-# elif defined(__mips) && defined(_LP64)
-#  if defined(__GNUC__) && __GNUC__>=2
-#   if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) /* "h" constraint is no more since 4.4 */
-#     define BN_UMULT_HIGH(a,b)          (((__uint128_t)(a)*(b))>>64)
-#     define BN_UMULT_LOHI(low,high,a,b) ({     \
-        __uint128_t ret=(__uint128_t)(a)*(b);   \
-        (high)=ret>>64; (low)=ret;       })
-#   else
-#     define BN_UMULT_HIGH(a,b) ({      \
-        BN_ULONG ret;           \
-        asm ("dmultu    %1,%2"          \
-             : "=h"(ret)                \
-             : "r"(a), "r"(b) : "l");   \
-        ret;                    })
-#     define BN_UMULT_LOHI(low,high,a,b)\
-        asm ("dmultu    %2,%3"          \
-             : "=l"(low),"=h"(high)     \
-             : "r"(a), "r"(b));
-#    endif
-#  endif
-# endif         /* cpu */
-#endif          /* OPENSSL_NO_ASM */
-/*************************************************************
- * Using the long long type
- */
-#define Lw(t)    (((BN_ULONG)(t))&BN_MASK2)
-#define Hw(t)    (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
-#ifndef BN_LLONG
-/*************************************************************
- * No long long type
- */
-#define LBITS(a)        ((a)&BN_MASK2l)
-#define HBITS(a)        (((a)>>BN_BITS4)&BN_MASK2l)
-#define L2HBITS(a)      (((a)<<BN_BITS4)&BN_MASK2)
-#define mul64(l,h,bl,bh) \
-        { \
-        BN_ULONG m,m1,lt,ht; \
- \
-        lt=l; \
-        ht=h; \
-        m =(bh)*(lt); \
-        lt=(bl)*(lt); \
-        m1=(bl)*(ht); \
-        ht =(bh)*(ht); \
-        m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
-        ht+=HBITS(m); \
-        m1=L2HBITS(m); \
-        lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
-        (l)=lt; \
-        (h)=ht; \
-        }
-#define sqr64(lo,ho,in) \
-        { \
-        BN_ULONG l,h,m; \
- \
-        h=(in); \
-        l=LBITS(h); \
-        h=HBITS(h); \
-        m =(l)*(h); \
-        l*=l; \
-        h*=h; \
-        h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
-        m =(m&BN_MASK2l)<<(BN_BITS4+1); \
-        l=(l+m)&BN_MASK2; if (l < m) h++; \
-        (lo)=l; \
-        (ho)=h; \
-        }
-#endif /* !BN_LLONG */
-/* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
-/* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
-/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
-#ifdef BN_LLONG
-/*
- * Keep in mind that additions to multiplication result can not
- * overflow, because its high half cannot be all-ones.
- */
-#define mul_add_c2(a,b,c0,c1,c2)        do {    \
-        BN_ULONG hi;                            \
-        BN_ULLONG t = (BN_ULLONG)(a)*(b);       \
-        BN_ULLONG tt = t+c0;    /* no carry */  \
-        c0 = (BN_ULONG)Lw(tt);                  \
-        hi = (BN_ULONG)Hw(tt);                  \
-        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
-        t += c0;                /* no carry */  \
-        c0 = (BN_ULONG)Lw(t);                   \
-        hi = (BN_ULONG)Hw(t);                   \
-        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
-        } while(0)
-#define sqr_add_c(a,i,c0,c1,c2)         do {    \
-        BN_ULONG hi;                            \
-        BN_ULLONG t = (BN_ULLONG)a[i]*a[i];     \
-        t += c0;                /* no carry */  \
-        c0 = (BN_ULONG)Lw(t);                   \
-        hi = (BN_ULONG)Hw(t);                   \
-        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
-        } while(0)
-#define sqr_add_c2(a,i,j,c0,c1,c2) \
-        mul_add_c2((a)[i],(a)[j],c0,c1,c2)
-#elif defined(BN_UMULT_LOHI)
-/*
- * Keep in mind that additions to hi can not overflow, because
- * the high word of a multiplication result cannot be all-ones.
- */
-#define mul_add_c2(a,b,c0,c1,c2)        do {    \
-        BN_ULONG ta = (a), tb = (b);            \
-        BN_ULONG lo, hi, tt;                    \
-        BN_UMULT_LOHI(lo,hi,ta,tb);             \
-        c0 += lo; tt = hi+((c0<lo)?1:0);        \
-        c1 += tt; c2 += (c1<tt)?1:0;            \
-        c0 += lo; hi += (c0<lo)?1:0;            \
-        c1 += hi; c2 += (c1<hi)?1:0;            \
-        } while(0)
-#define sqr_add_c(a,i,c0,c1,c2)         do {    \
-        BN_ULONG ta = (a)[i];                   \
-        BN_ULONG lo, hi;                        \
-        BN_UMULT_LOHI(lo,hi,ta,ta);             \
-        c0 += lo; hi += (c0<lo)?1:0;            \
-        c1 += hi; c2 += (c1<hi)?1:0;            \
-        } while(0)
-#define sqr_add_c2(a,i,j,c0,c1,c2)      \
-        mul_add_c2((a)[i],(a)[j],c0,c1,c2)
-#elif defined(BN_UMULT_HIGH)
-/*
- * Keep in mind that additions to hi can not overflow, because
- * the high word of a multiplication result cannot be all-ones.
- */
-#define mul_add_c2(a,b,c0,c1,c2)        do {    \
-        BN_ULONG ta = (a), tb = (b), tt;        \
-        BN_ULONG lo = ta * tb;                  \
-        BN_ULONG hi = BN_UMULT_HIGH(ta,tb);     \
-        c0 += lo; tt = hi + ((c0<lo)?1:0);      \
-        c1 += tt; c2 += (c1<tt)?1:0;            \
-        c0 += lo; hi += (c0<lo)?1:0;            \
-        c1 += hi; c2 += (c1<hi)?1:0;            \
-        } while(0)
-#define sqr_add_c(a,i,c0,c1,c2)         do {    \
-        BN_ULONG ta = (a)[i];                   \
-        BN_ULONG lo = ta * ta;                  \
-        BN_ULONG hi = BN_UMULT_HIGH(ta,ta);     \
-        c0 += lo; hi += (c0<lo)?1:0;            \
-        c1 += hi; c2 += (c1<hi)?1:0;            \
-        } while(0)
-#define sqr_add_c2(a,i,j,c0,c1,c2)      \
-        mul_add_c2((a)[i],(a)[j],c0,c1,c2)
-#else /* !BN_LLONG */
-/*
- * Keep in mind that additions to hi can not overflow, because
- * the high word of a multiplication result cannot be all-ones.
- */
-#define mul_add_c2(a,b,c0,c1,c2)        do {    \
-        BN_ULONG tt;                            \
-        BN_ULONG lo = LBITS(a), hi = HBITS(a);  \
-        BN_ULONG bl = LBITS(b), bh = HBITS(b);  \
-        mul64(lo,hi,bl,bh);                     \
-        tt = hi;                                \
-        c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \
-        c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \
-        c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
-        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
-        } while(0)
-#define sqr_add_c(a,i,c0,c1,c2)         do {    \
-        BN_ULONG lo, hi;                        \
-        sqr64(lo,hi,(a)[i]);                    \
-        c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
-        c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
-        } while(0)
-#define sqr_add_c2(a,i,j,c0,c1,c2) \
-        mul_add_c2((a)[i],(a)[j],c0,c1,c2)
-#endif /* !BN_LLONG */
 /* The least significant word of a BIGNUM. */
 #define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])
author	jsing <>	2023-02-17 05:30:20 +0000
committer	jsing <>	2023-02-17 05:30:20 +0000
commit	d58b728cbd6aaa07432f9003f6cd5b9687d0eede (patch)
tree	3f07aabc00a657ef33b153b4fe4ae290c8eb374e /src/lib
parent	b7f1c098b1a50519f08f8112820c6dcf50a9f2f0 (diff)
download	openbsd-d58b728cbd6aaa07432f9003f6cd5b9687d0eede.tar.gz openbsd-d58b728cbd6aaa07432f9003f6cd5b9687d0eede.tar.bz2 openbsd-d58b728cbd6aaa07432f9003f6cd5b9687d0eede.zip

diff --git a/src/lib/libcrypto/bn/bn_local.h b/src/lib/libcrypto/bn/bn_local.h index 51582f9833..6d308218e7 100644 --- a/src/lib/libcrypto/bn/bn_local.h +++ b/src/lib/libcrypto/bn/bn_local.h
@@ -1,4 +1,4 @@
1	/* $OpenBSD: bn_local.h,v 1.10 2023/02/16 11:13:05 jsing Exp $ */	1	/* $OpenBSD: bn_local.h,v 1.11 2023/02/17 05:30:20 jsing Exp $ */
2	/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)	2	/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3	* All rights reserved.	3	* All rights reserved.
4	*	4	*
@@ -241,256 +241,6 @@ struct bn_gencb_st {
241	#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */	241	#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */
242	#define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */	242	#define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */
243		243
244	#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
245	/*
246	* BN_UMULT_HIGH section.
247	*
248	* No, I'm not trying to overwhelm you when stating that the
249	* product of N-bit numbers is 2*N bits wide:-) No, I don't expect
250	* you to be impressed when I say that if the compiler doesn't
251	* support 2N integer type, then you have to replace every NN
252	* multiplication with 4 (N/2)*(N/2) accompanied by some shifts
253	* and additions which unavoidably results in severe performance
254	* penalties. Of course provided that the hardware is capable of
255	* producing 2*N result... That's when you normally start
256	* considering assembler implementation. However! It should be
257	* pointed out that some CPUs (most notably Alpha, PowerPC and
258	* upcoming IA-64 family:-) provide separate instruction
259	* calculating the upper half of the product placing the result
260	* into a general purpose register. Now if the compiler supports
261	* inline assembler, then it's not impossible to implement the
262	* "bignum" routines (and have the compiler optimize 'em)
263	* exhibiting "native" performance in C. That's what BN_UMULT_HIGH
264	* macro is about:-)
265	*
266	* <appro@fy.chalmers.se>
267	*/
268	# if defined(__alpha)
269	# if defined(__GNUC__) && __GNUC__>=2
270	# define BN_UMULT_HIGH(a,b) ({ \
271	BN_ULONG ret; \
272	asm ("umulh %1,%2,%0" \
273	: "=r"(ret) \
274	: "r"(a), "r"(b)); \
275	ret; })
276	# endif /* compiler */
277	# elif defined(_ARCH_PPC) && defined(_LP64)
278	# if defined(__GNUC__) && __GNUC__>=2
279	# define BN_UMULT_HIGH(a,b) ({ \
280	BN_ULONG ret; \
281	asm ("mulhdu %0,%1,%2" \
282	: "=r"(ret) \
283	: "r"(a), "r"(b)); \
284	ret; })
285	# endif /* compiler */
286	# elif defined(__x86_64) \|\| defined(__x86_64__)
287	# if defined(__GNUC__) && __GNUC__>=2
288	# define BN_UMULT_HIGH(a,b) ({ \
289	BN_ULONG ret,discard; \
290	asm ("mulq %3" \
291	: "=a"(discard),"=d"(ret) \
292	: "a"(a), "g"(b) \
293	: "cc"); \
294	ret; })
295	# define BN_UMULT_LOHI(low,high,a,b) \
296	asm ("mulq %3" \
297	: "=a"(low),"=d"(high) \
298	: "a"(a),"g"(b) \
299	: "cc");
300	# endif
301	# elif defined(__mips) && defined(_LP64)
302	# if defined(__GNUC__) && __GNUC__>=2
303	# if __GNUC__ > 4 \|\| (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) /* "h" constraint is no more since 4.4 */
304	# define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64)
305	# define BN_UMULT_LOHI(low,high,a,b) ({ \
306	__uint128_t ret=(__uint128_t)(a)*(b); \
307	(high)=ret>>64; (low)=ret; })
308	# else
309	# define BN_UMULT_HIGH(a,b) ({ \
310	BN_ULONG ret; \
311	asm ("dmultu %1,%2" \
312	: "=h"(ret) \
313	: "r"(a), "r"(b) : "l"); \
314	ret; })
315	# define BN_UMULT_LOHI(low,high,a,b)\
316	asm ("dmultu %2,%3" \
317	: "=l"(low),"=h"(high) \
318	: "r"(a), "r"(b));
319	# endif
320	# endif
321	# endif /* cpu */
322	#endif /* OPENSSL_NO_ASM */
323
324	/*************************************************************
325	* Using the long long type
326	*/
327	#define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
328	#define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
329
330	#ifndef BN_LLONG
331	/*************************************************************
332	* No long long type
333	*/
334
335	#define LBITS(a) ((a)&BN_MASK2l)
336	#define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
337	#define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
338
339	#define mul64(l,h,bl,bh) \
340	{ \
341	BN_ULONG m,m1,lt,ht; \
342	\
343	lt=l; \
344	ht=h; \
345	m =(bh)*(lt); \
346	lt=(bl)*(lt); \
347	m1=(bl)*(ht); \
348	ht =(bh)*(ht); \
349	m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
350	ht+=HBITS(m); \
351	m1=L2HBITS(m); \
352	lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
353	(l)=lt; \
354	(h)=ht; \
355	}
356
357	#define sqr64(lo,ho,in) \
358	{ \
359	BN_ULONG l,h,m; \
360	\
361	h=(in); \
362	l=LBITS(h); \
363	h=HBITS(h); \
364	m =(l)*(h); \
365	l*=l; \
366	h*=h; \
367	h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
368	m =(m&BN_MASK2l)<<(BN_BITS4+1); \
369	l=(l+m)&BN_MASK2; if (l < m) h++; \
370	(lo)=l; \
371	(ho)=h; \
372	}
373
374	#endif /* !BN_LLONG */
375
376	/* mul_add_c2(a,b,c0,c1,c2) -- c+=2ab for three word number c=(c2,c1,c0) */
377	/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
378	/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2a[i]a[j] for three word number c=(c2,c1,c0) */
379
380	#ifdef BN_LLONG
381	/*
382	* Keep in mind that additions to multiplication result can not
383	* overflow, because its high half cannot be all-ones.
384	*/
385
386	#define mul_add_c2(a,b,c0,c1,c2) do { \
387	BN_ULONG hi; \
388	BN_ULLONG t = (BN_ULLONG)(a)*(b); \
389	BN_ULLONG tt = t+c0; /* no carry */ \
390	c0 = (BN_ULONG)Lw(tt); \
391	hi = (BN_ULONG)Hw(tt); \
392	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
393	t += c0; /* no carry */ \
394	c0 = (BN_ULONG)Lw(t); \
395	hi = (BN_ULONG)Hw(t); \
396	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
397	} while(0)
398
399	#define sqr_add_c(a,i,c0,c1,c2) do { \
400	BN_ULONG hi; \
401	BN_ULLONG t = (BN_ULLONG)a[i]*a[i]; \
402	t += c0; /* no carry */ \
403	c0 = (BN_ULONG)Lw(t); \
404	hi = (BN_ULONG)Hw(t); \
405	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
406	} while(0)
407
408	#define sqr_add_c2(a,i,j,c0,c1,c2) \
409	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
410
411	#elif defined(BN_UMULT_LOHI)
412	/*
413	* Keep in mind that additions to hi can not overflow, because
414	* the high word of a multiplication result cannot be all-ones.
415	*/
416
417	#define mul_add_c2(a,b,c0,c1,c2) do { \
418	BN_ULONG ta = (a), tb = (b); \
419	BN_ULONG lo, hi, tt; \
420	BN_UMULT_LOHI(lo,hi,ta,tb); \
421	c0 += lo; tt = hi+((c0<lo)?1:0); \
422	c1 += tt; c2 += (c1<tt)?1:0; \
423	c0 += lo; hi += (c0<lo)?1:0; \
424	c1 += hi; c2 += (c1<hi)?1:0; \
425	} while(0)
426
427	#define sqr_add_c(a,i,c0,c1,c2) do { \
428	BN_ULONG ta = (a)[i]; \
429	BN_ULONG lo, hi; \
430	BN_UMULT_LOHI(lo,hi,ta,ta); \
431	c0 += lo; hi += (c0<lo)?1:0; \
432	c1 += hi; c2 += (c1<hi)?1:0; \
433	} while(0)
434
435	#define sqr_add_c2(a,i,j,c0,c1,c2) \
436	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
437
438	#elif defined(BN_UMULT_HIGH)
439	/*
440	* Keep in mind that additions to hi can not overflow, because
441	* the high word of a multiplication result cannot be all-ones.
442	*/
443
444	#define mul_add_c2(a,b,c0,c1,c2) do { \
445	BN_ULONG ta = (a), tb = (b), tt; \
446	BN_ULONG lo = ta * tb; \
447	BN_ULONG hi = BN_UMULT_HIGH(ta,tb); \
448	c0 += lo; tt = hi + ((c0<lo)?1:0); \
449	c1 += tt; c2 += (c1<tt)?1:0; \
450	c0 += lo; hi += (c0<lo)?1:0; \
451	c1 += hi; c2 += (c1<hi)?1:0; \
452	} while(0)
453
454	#define sqr_add_c(a,i,c0,c1,c2) do { \
455	BN_ULONG ta = (a)[i]; \
456	BN_ULONG lo = ta * ta; \
457	BN_ULONG hi = BN_UMULT_HIGH(ta,ta); \
458	c0 += lo; hi += (c0<lo)?1:0; \
459	c1 += hi; c2 += (c1<hi)?1:0; \
460	} while(0)
461
462	#define sqr_add_c2(a,i,j,c0,c1,c2) \
463	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
464
465	#else /* !BN_LLONG */
466	/*
467	* Keep in mind that additions to hi can not overflow, because
468	* the high word of a multiplication result cannot be all-ones.
469	*/
470
471	#define mul_add_c2(a,b,c0,c1,c2) do { \
472	BN_ULONG tt; \
473	BN_ULONG lo = LBITS(a), hi = HBITS(a); \
474	BN_ULONG bl = LBITS(b), bh = HBITS(b); \
475	mul64(lo,hi,bl,bh); \
476	tt = hi; \
477	c0 = (c0+lo)&BN_MASK2; if (c0<lo) tt++; \
478	c1 = (c1+tt)&BN_MASK2; if (c1<tt) c2++; \
479	c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
480	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
481	} while(0)
482
483	#define sqr_add_c(a,i,c0,c1,c2) do { \
484	BN_ULONG lo, hi; \
485	sqr64(lo,hi,(a)[i]); \
486	c0 = (c0+lo)&BN_MASK2; if (c0<lo) hi++; \
487	c1 = (c1+hi)&BN_MASK2; if (c1<hi) c2++; \
488	} while(0)
489
490	#define sqr_add_c2(a,i,j,c0,c1,c2) \
491	mul_add_c2((a)[i],(a)[j],c0,c1,c2)
492	#endif /* !BN_LLONG */
493
494	/* The least significant word of a BIGNUM. */	244	/* The least significant word of a BIGNUM. */
495	#define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])	245	#define BN_lsw(n) (((n)->top == 0) ? (BN_ULONG) 0 : (n)->d[0])
496		246