diff options
author | jsing <> | 2023-07-02 13:11:23 +0000 |
---|---|---|
committer | jsing <> | 2023-07-02 13:11:23 +0000 |
commit | 1f763186de570f92a5cbf045453f0f71f5e1f9ff (patch) | |
tree | 085e6a450eaeaa483d6aff7b5ab2534db11035e3 /src/lib/libcrypto | |
parent | 7029728cfe23ff5ba2d324c24ad7f3147408f92c (diff) | |
download | openbsd-1f763186de570f92a5cbf045453f0f71f5e1f9ff.tar.gz openbsd-1f763186de570f92a5cbf045453f0f71f5e1f9ff.tar.bz2 openbsd-1f763186de570f92a5cbf045453f0f71f5e1f9ff.zip |
Replace bn_sqr_words() with bn_sqr_add_words().
In order to implement efficient squaring, we compute the sum of products
(omitting the squares), double the sum of products and then finally
compute and add in the squares. However, for reasons unknown the final
calculation was implemented as two separate steps.
Replace bn_sqr_words() with bn_sqr_add_words() such that we do the
computation in one step, avoid the need for temporary BN and remove
needless overhead. This gives us a performance gain across most
architectures (even with the loss of sse2 on i386, for example).
ok tb@
Diffstat (limited to 'src/lib/libcrypto')
-rw-r--r-- | src/lib/libcrypto/bn/bn_sqr.c | 58 |
1 files changed, 23 insertions, 35 deletions
diff --git a/src/lib/libcrypto/bn/bn_sqr.c b/src/lib/libcrypto/bn/bn_sqr.c index 5ea1bd45b9..2879d34c0e 100644 --- a/src/lib/libcrypto/bn/bn_sqr.c +++ b/src/lib/libcrypto/bn/bn_sqr.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_sqr.c,v 1.34 2023/06/24 17:06:54 jsing Exp $ */ | 1 | /* $OpenBSD: bn_sqr.c,v 1.35 2023/07/02 13:11:23 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -160,41 +160,45 @@ bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) | |||
160 | } | 160 | } |
161 | #endif | 161 | #endif |
162 | 162 | ||
163 | #ifndef HAVE_BN_SQR_WORDS | 163 | #ifndef HAVE_BN_SQR |
164 | /* | 164 | /* |
165 | * bn_sqr_words() computes (r[i*2+1]:r[i*2]) = a[i] * a[i]. | 165 | * bn_sqr_add_words() computes (r[i*2+1]:r[i*2]) = (r[i*2+1]:r[i*2]) + a[i] * a[i]. |
166 | */ | 166 | */ |
167 | void | 167 | static void |
168 | bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) | 168 | bn_sqr_add_words(BN_ULONG *r, const BN_ULONG *a, int n) |
169 | { | 169 | { |
170 | BN_ULONG x3, x2, x1, x0; | ||
171 | BN_ULONG carry = 0; | ||
172 | |||
170 | assert(n >= 0); | 173 | assert(n >= 0); |
171 | if (n <= 0) | 174 | if (n <= 0) |
172 | return; | 175 | return; |
173 | 176 | ||
174 | #ifndef OPENSSL_SMALL_FOOTPRINT | ||
175 | while (n & ~3) { | 177 | while (n & ~3) { |
176 | bn_mulw(a[0], a[0], &r[1], &r[0]); | 178 | bn_mulw(a[0], a[0], &x1, &x0); |
177 | bn_mulw(a[1], a[1], &r[3], &r[2]); | 179 | bn_mulw(a[1], a[1], &x3, &x2); |
178 | bn_mulw(a[2], a[2], &r[5], &r[4]); | 180 | bn_qwaddqw(x3, x2, x1, x0, r[3], r[2], r[1], r[0], carry, |
179 | bn_mulw(a[3], a[3], &r[7], &r[6]); | 181 | &carry, &r[3], &r[2], &r[1], &r[0]); |
182 | bn_mulw(a[2], a[2], &x1, &x0); | ||
183 | bn_mulw(a[3], a[3], &x3, &x2); | ||
184 | bn_qwaddqw(x3, x2, x1, x0, r[7], r[6], r[5], r[4], carry, | ||
185 | &carry, &r[7], &r[6], &r[5], &r[4]); | ||
186 | |||
180 | a += 4; | 187 | a += 4; |
181 | r += 8; | 188 | r += 8; |
182 | n -= 4; | 189 | n -= 4; |
183 | } | 190 | } |
184 | #endif | ||
185 | while (n) { | 191 | while (n) { |
186 | bn_mulw(a[0], a[0], &r[1], &r[0]); | 192 | bn_mulw_addw_addw(a[0], a[0], r[0], carry, &carry, &r[0]); |
193 | bn_addw(r[1], carry, &carry, &r[1]); | ||
187 | a++; | 194 | a++; |
188 | r += 2; | 195 | r += 2; |
189 | n--; | 196 | n--; |
190 | } | 197 | } |
191 | } | 198 | } |
192 | #endif | ||
193 | 199 | ||
194 | #ifndef HAVE_BN_SQR | ||
195 | static void | 200 | static void |
196 | bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, | 201 | bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len) |
197 | BN_ULONG *tmp) | ||
198 | { | 202 | { |
199 | const BN_ULONG *ap; | 203 | const BN_ULONG *ap; |
200 | BN_ULONG *rp; | 204 | BN_ULONG *rp; |
@@ -234,8 +238,7 @@ bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, | |||
234 | bn_add_words(r, r, r, r_len); | 238 | bn_add_words(r, r, r, r_len); |
235 | 239 | ||
236 | /* Add squares. */ | 240 | /* Add squares. */ |
237 | bn_sqr_words(tmp, a, a_len); | 241 | bn_sqr_add_words(r, a, a_len); |
238 | bn_add_words(r, r, tmp, r_len); | ||
239 | } | 242 | } |
240 | 243 | ||
241 | /* | 244 | /* |
@@ -246,24 +249,9 @@ bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len, | |||
246 | int | 249 | int |
247 | bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx) | 250 | bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx) |
248 | { | 251 | { |
249 | BIGNUM *tmp; | 252 | bn_sqr_normal(r->d, r_len, a->d, a->top); |
250 | int ret = 0; | ||
251 | |||
252 | BN_CTX_start(ctx); | ||
253 | 253 | ||
254 | if ((tmp = BN_CTX_get(ctx)) == NULL) | 254 | return 1; |
255 | goto err; | ||
256 | if (!bn_wexpand(tmp, r_len)) | ||
257 | goto err; | ||
258 | |||
259 | bn_sqr_normal(r->d, r_len, a->d, a->top, tmp->d); | ||
260 | |||
261 | ret = 1; | ||
262 | |||
263 | err: | ||
264 | BN_CTX_end(ctx); | ||
265 | |||
266 | return ret; | ||
267 | } | 255 | } |
268 | #endif | 256 | #endif |
269 | 257 | ||