diff options
author | jsing <> | 2023-04-22 14:31:44 +0000 |
---|---|---|
committer | jsing <> | 2023-04-22 14:31:44 +0000 |
commit | 62512e9d0f52e2b7245e969b799ff5709af452bc (patch) | |
tree | cfab0ac02c72727c2556085ad9fc4cb1b67f7ad2 /src/lib | |
parent | 2afa30e89183897153850cee97c094af170d7743 (diff) | |
download | openbsd-62512e9d0f52e2b7245e969b799ff5709af452bc.tar.gz openbsd-62512e9d0f52e2b7245e969b799ff5709af452bc.tar.bz2 openbsd-62512e9d0f52e2b7245e969b799ff5709af452bc.zip |
Improve bn_montgomery_multiply_words()
Pull a number of invariants into variables, which avoids repeated loading
from memory on architectures where sufficient registers are available.
Also keep track of the per-iteration carry in a variable, rather than
unnecessarily reading from and writing to memory.
This gives a reasonable performance gain on some architectures (e.g. armv7)
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/libcrypto/bn/bn_mont.c | 25 |
1 files changed, 16 insertions, 9 deletions
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c index b327b89792..26b9a6f64d 100644 --- a/src/lib/libcrypto/bn/bn_mont.c +++ b/src/lib/libcrypto/bn/bn_mont.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_mont.c,v 1.56 2023/04/07 23:03:32 tb Exp $ */ | 1 | /* $OpenBSD: bn_mont.c,v 1.57 2023/04/22 14:31:44 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -345,25 +345,32 @@ void | |||
345 | bn_montgomery_multiply_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | 345 | bn_montgomery_multiply_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, |
346 | const BN_ULONG *np, BN_ULONG *tp, BN_ULONG n0, int n_len) | 346 | const BN_ULONG *np, BN_ULONG *tp, BN_ULONG n0, int n_len) |
347 | { | 347 | { |
348 | BN_ULONG carry1, carry2, mask, w, x; | 348 | BN_ULONG a0, b, carry_a, carry_n, carry, mask, w, x; |
349 | int i, j; | 349 | int i, j; |
350 | 350 | ||
351 | for (i = 0; i <= n_len; i++) | 351 | carry_a = carry_n = carry = 0; |
352 | |||
353 | for (i = 0; i < n_len; i++) | ||
352 | tp[i] = 0; | 354 | tp[i] = 0; |
353 | 355 | ||
356 | a0 = ap[0]; | ||
357 | |||
354 | for (i = 0; i < n_len; i++) { | 358 | for (i = 0; i < n_len; i++) { |
355 | /* Compute new t[0] * n0, as we need it inside the loop. */ | 359 | b = bp[i]; |
356 | w = (ap[0] * bp[i] + tp[0]) * n0; | ||
357 | 360 | ||
358 | carry1 = carry2 = 0; | 361 | /* Compute new t[0] * n0, as we need it inside the loop. */ |
362 | w = (a0 * b + tp[0]) * n0; | ||
363 | |||
359 | for (j = 0; j < n_len; j++) { | 364 | for (j = 0; j < n_len; j++) { |
360 | bn_mulw_addw_addw(ap[j], bp[i], tp[j], carry1, &carry1, &x); | 365 | bn_mulw_addw_addw(ap[j], b, tp[j], carry_a, &carry_a, &x); |
361 | bn_mulw_addw_addw(np[j], w, x, carry2, &carry2, &tp[j]); | 366 | bn_mulw_addw_addw(np[j], w, x, carry_n, &carry_n, &tp[j]); |
362 | } | 367 | } |
363 | bn_addw_addw(carry1, carry2, tp[n_len], &tp[n_len + 1], &tp[n_len]); | 368 | bn_addw_addw(carry_a, carry_n, carry, &carry, &tp[n_len]); |
369 | carry_a = carry_n = 0; | ||
364 | 370 | ||
365 | tp++; | 371 | tp++; |
366 | } | 372 | } |
373 | tp[n_len] = carry; | ||
367 | 374 | ||
368 | /* | 375 | /* |
369 | * The output is now in the range of [0, 2N). Attempt to reduce once by | 376 | * The output is now in the range of [0, 2N). Attempt to reduce once by |