summaryrefslogtreecommitdiff
path: root/src/lib/libcrypto
diff options
context:
space:
mode:
authorjsing <>2023-07-02 13:11:23 +0000
committerjsing <>2023-07-02 13:11:23 +0000
commit1f763186de570f92a5cbf045453f0f71f5e1f9ff (patch)
tree085e6a450eaeaa483d6aff7b5ab2534db11035e3 /src/lib/libcrypto
parent7029728cfe23ff5ba2d324c24ad7f3147408f92c (diff)
downloadopenbsd-1f763186de570f92a5cbf045453f0f71f5e1f9ff.tar.gz
openbsd-1f763186de570f92a5cbf045453f0f71f5e1f9ff.tar.bz2
openbsd-1f763186de570f92a5cbf045453f0f71f5e1f9ff.zip
Replace bn_sqr_words() with bn_sqr_add_words().
In order to implement efficient squaring, we compute the sum of products (omitting the squares), double the sum of products and then finally compute and add in the squares. However, for reasons unknown the final calculation was implemented as two separate steps. Replace bn_sqr_words() with bn_sqr_add_words() such that we do the computation in one step, avoid the need for temporary BN and remove needless overhead. This gives us a performance gain across most architectures (even with the loss of sse2 on i386, for example). ok tb@
Diffstat (limited to 'src/lib/libcrypto')
-rw-r--r--src/lib/libcrypto/bn/bn_sqr.c58
1 files changed, 23 insertions, 35 deletions
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c
index 5ea1bd45b9..2879d34c0e 100644
--- a/src/lib/libcrypto/bn/bn_sqr.c
+++ b/src/lib/libcrypto/bn/bn_sqr.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_sqr.c,v 1.34 2023/06/24 17:06:54 jsing Exp $ */ 1/* $OpenBSD: bn_sqr.c,v 1.35 2023/07/02 13:11:23 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -160,41 +160,45 @@ bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
160} 160}
161#endif 161#endif
162 162
163#ifndef HAVE_BN_SQR_WORDS 163#ifndef HAVE_BN_SQR
164/* 164/*
165 * bn_sqr_words() computes (r[i*2+1]:r[i*2]) = a[i] * a[i]. 165 * bn_sqr_add_words() computes (r[i*2+1]:r[i*2]) = (r[i*2+1]:r[i*2]) + a[i] * a[i].
166 */ 166 */
167void 167static void
168bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) 168bn_sqr_add_words(BN_ULONG *r, const BN_ULONG *a, int n)
169{ 169{
170 BN_ULONG x3, x2, x1, x0;
171 BN_ULONG carry = 0;
172
170 assert(n >= 0); 173 assert(n >= 0);
171 if (n <= 0) 174 if (n <= 0)
172 return; 175 return;
173 176
174#ifndef OPENSSL_SMALL_FOOTPRINT
175 while (n & ~3) { 177 while (n & ~3) {
176 bn_mulw(a[0], a[0], &r[1], &r[0]); 178 bn_mulw(a[0], a[0], &x1, &x0);
177 bn_mulw(a[1], a[1], &r[3], &r[2]); 179 bn_mulw(a[1], a[1], &x3, &x2);
178 bn_mulw(a[2], a[2], &r[5], &r[4]); 180 bn_qwaddqw(x3, x2, x1, x0, r[3], r[2], r[1], r[0], carry,
179 bn_mulw(a[3], a[3], &r[7], &r[6]); 181 &carry, &r[3], &r[2], &r[1], &r[0]);
182 bn_mulw(a[2], a[2], &x1, &x0);
183 bn_mulw(a[3], a[3], &x3, &x2);
184 bn_qwaddqw(x3, x2, x1, x0, r[7], r[6], r[5], r[4], carry,
185 &carry, &r[7], &r[6], &r[5], &r[4]);
186
180 a += 4; 187 a += 4;
181 r += 8; 188 r += 8;
182 n -= 4; 189 n -= 4;
183 } 190 }
184#endif
185 while (n) { 191 while (n) {
186 bn_mulw(a[0], a[0], &r[1], &r[0]); 192 bn_mulw_addw_addw(a[0], a[0], r[0], carry, &carry, &r[0]);
193 bn_addw(r[1], carry, &carry, &r[1]);
187 a++; 194 a++;
188 r += 2; 195 r += 2;
189 n--; 196 n--;
190 } 197 }
191} 198}
192#endif
193 199
194#ifndef HAVE_BN_SQR
195static void 200static void
196bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, 201bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len)
197 BN_ULONG *tmp)
198{ 202{
199 const BN_ULONG *ap; 203 const BN_ULONG *ap;
200 BN_ULONG *rp; 204 BN_ULONG *rp;
@@ -234,8 +238,7 @@ bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len,
234 bn_add_words(r, r, r, r_len); 238 bn_add_words(r, r, r, r_len);
235 239
236 /* Add squares. */ 240 /* Add squares. */
237 bn_sqr_words(tmp, a, a_len); 241 bn_sqr_add_words(r, a, a_len);
238 bn_add_words(r, r, tmp, r_len);
239} 242}
240 243
241/* 244/*
@@ -246,24 +249,9 @@ bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len,
246int 249int
247bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx) 250bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx)
248{ 251{
249 BIGNUM *tmp; 252 bn_sqr_normal(r->d, r_len, a->d, a->top);
250 int ret = 0;
251
252 BN_CTX_start(ctx);
253 253
254 if ((tmp = BN_CTX_get(ctx)) == NULL) 254 return 1;
255 goto err;
256 if (!bn_wexpand(tmp, r_len))
257 goto err;
258
259 bn_sqr_normal(r->d, r_len, a->d, a->top, tmp->d);
260
261 ret = 1;
262
263 err:
264 BN_CTX_end(ctx);
265
266 return ret;
267} 255}
268#endif 256#endif
269 257