diff options
| author | jsing <> | 2023-01-28 17:07:02 +0000 |
|---|---|---|
| committer | jsing <> | 2023-01-28 17:07:02 +0000 |
| commit | 5771cd5b92eba7bf6bd7dd6b8768d00f6b574509 (patch) | |
| tree | 0c4f887499bd8a562c3ad584aa9b480775aef7de | |
| parent | 51f6bf54ed0ca417a8e838b9045b90fcfd53bcf5 (diff) | |
| download | openbsd-5771cd5b92eba7bf6bd7dd6b8768d00f6b574509.tar.gz openbsd-5771cd5b92eba7bf6bd7dd6b8768d00f6b574509.tar.bz2 openbsd-5771cd5b92eba7bf6bd7dd6b8768d00f6b574509.zip | |
Move the more readable version of bn_mul_mont() from bn_asm.c to bn_mont.c.
Nothing actually uses this code, as OPENSSL_BN_ASM_MONT is not defined
unless there is an assembly implementation available (not to mention that
defining both OPENSSL_NO_ASM and OPENSSL_BN_ASM_MONT at the same time is
extra strange).
Discussed with tb@
Diffstat (limited to '')
| -rw-r--r-- | src/lib/libcrypto/bn/bn_asm.c | 168 | ||||
| -rw-r--r-- | src/lib/libcrypto/bn/bn_mont.c | 55 |
2 files changed, 55 insertions, 168 deletions
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index e2b584ee85..bfdeabd9eb 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_asm.c,v 1.23 2023/01/23 12:17:57 jsing Exp $ */ | 1 | /* $OpenBSD: bn_asm.c,v 1.24 2023/01/28 17:07:02 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -63,169 +63,3 @@ | |||
| 63 | 63 | ||
| 64 | #include "bn_local.h" | 64 | #include "bn_local.h" |
| 65 | 65 | ||
| 66 | #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) | ||
| 67 | |||
| 68 | #ifdef OPENSSL_NO_ASM | ||
| 69 | #ifdef OPENSSL_BN_ASM_MONT | ||
| 70 | /* | ||
| 71 | * This is essentially reference implementation, which may or may not | ||
| 72 | * result in performance improvement. E.g. on IA-32 this routine was | ||
| 73 | * observed to give 40% faster rsa1024 private key operations and 10% | ||
| 74 | * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only | ||
| 75 | * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a | ||
| 76 | * reference implementation, one to be used as starting point for | ||
| 77 | * platform-specific assembler. Mentioned numbers apply to compiler | ||
| 78 | * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and | ||
| 79 | * can vary not only from platform to platform, but even for compiler | ||
| 80 | * versions. Assembler vs. assembler improvement coefficients can | ||
| 81 | * [and are known to] differ and are to be documented elsewhere. | ||
| 82 | */ | ||
| 83 | int | ||
| 84 | bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0p, int num) | ||
| 85 | { | ||
| 86 | BN_ULONG c0, c1, ml, *tp, n0; | ||
| 87 | #ifdef mul64 | ||
| 88 | BN_ULONG mh; | ||
| 89 | #endif | ||
| 90 | int i = 0, j; | ||
| 91 | |||
| 92 | #if 0 /* template for platform-specific implementation */ | ||
| 93 | if (ap == bp) | ||
| 94 | return bn_sqr_mont(rp, ap, np, n0p, num); | ||
| 95 | #endif | ||
| 96 | tp = reallocarray(NULL, num + 2, sizeof(BN_ULONG)); | ||
| 97 | if (tp == NULL) | ||
| 98 | return 0; | ||
| 99 | |||
| 100 | n0 = *n0p; | ||
| 101 | |||
| 102 | c0 = 0; | ||
| 103 | ml = bp[0]; | ||
| 104 | #ifdef mul64 | ||
| 105 | mh = HBITS(ml); | ||
| 106 | ml = LBITS(ml); | ||
| 107 | for (j = 0; j < num; ++j) | ||
| 108 | mul(tp[j], ap[j], ml, mh, c0); | ||
| 109 | #else | ||
| 110 | for (j = 0; j < num; ++j) | ||
| 111 | mul(tp[j], ap[j], ml, c0); | ||
| 112 | #endif | ||
| 113 | |||
| 114 | tp[num] = c0; | ||
| 115 | tp[num + 1] = 0; | ||
| 116 | goto enter; | ||
| 117 | |||
| 118 | for (i = 0; i < num; i++) { | ||
| 119 | c0 = 0; | ||
| 120 | ml = bp[i]; | ||
| 121 | #ifdef mul64 | ||
| 122 | mh = HBITS(ml); | ||
| 123 | ml = LBITS(ml); | ||
| 124 | for (j = 0; j < num; ++j) | ||
| 125 | mul_add(tp[j], ap[j], ml, mh, c0); | ||
| 126 | #else | ||
| 127 | for (j = 0; j < num; ++j) | ||
| 128 | mul_add(tp[j], ap[j], ml, c0); | ||
| 129 | #endif | ||
| 130 | c1 = (tp[num] + c0) & BN_MASK2; | ||
| 131 | tp[num] = c1; | ||
| 132 | tp[num + 1] = (c1 < c0 ? 1 : 0); | ||
| 133 | enter: | ||
| 134 | c1 = tp[0]; | ||
| 135 | ml = (c1 * n0) & BN_MASK2; | ||
| 136 | c0 = 0; | ||
| 137 | #ifdef mul64 | ||
| 138 | mh = HBITS(ml); | ||
| 139 | ml = LBITS(ml); | ||
| 140 | mul_add(c1, np[0], ml, mh, c0); | ||
| 141 | #else | ||
| 142 | mul_add(c1, ml, np[0], c0); | ||
| 143 | #endif | ||
| 144 | for (j = 1; j < num; j++) { | ||
| 145 | c1 = tp[j]; | ||
| 146 | #ifdef mul64 | ||
| 147 | mul_add(c1, np[j], ml, mh, c0); | ||
| 148 | #else | ||
| 149 | mul_add(c1, ml, np[j], c0); | ||
| 150 | #endif | ||
| 151 | tp[j - 1] = c1 & BN_MASK2; | ||
| 152 | } | ||
| 153 | c1 = (tp[num] + c0) & BN_MASK2; | ||
| 154 | tp[num - 1] = c1; | ||
| 155 | tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0); | ||
| 156 | } | ||
| 157 | |||
| 158 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { | ||
| 159 | c0 = bn_sub_words(rp, tp, np, num); | ||
| 160 | if (tp[num] != 0 || c0 == 0) { | ||
| 161 | goto out; | ||
| 162 | } | ||
| 163 | } | ||
| 164 | memcpy(rp, tp, num * sizeof(BN_ULONG)); | ||
| 165 | out: | ||
| 166 | freezero(tp, (num + 2) * sizeof(BN_ULONG)); | ||
| 167 | return 1; | ||
| 168 | } | ||
| 169 | #else | ||
| 170 | /* | ||
| 171 | * Return value of 0 indicates that multiplication/convolution was not | ||
| 172 | * performed to signal the caller to fall down to alternative/original | ||
| 173 | * code-path. | ||
| 174 | */ | ||
| 175 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) | ||
| 176 | { return 0; | ||
| 177 | } | ||
| 178 | #endif /* OPENSSL_BN_ASM_MONT */ | ||
| 179 | #endif | ||
| 180 | |||
| 181 | #else /* !BN_MUL_COMBA */ | ||
| 182 | |||
| 183 | #ifdef OPENSSL_NO_ASM | ||
| 184 | #ifdef OPENSSL_BN_ASM_MONT | ||
| 185 | int | ||
| 186 | bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | ||
| 187 | const BN_ULONG *np, const BN_ULONG *n0p, int num) | ||
| 188 | { | ||
| 189 | BN_ULONG c0, c1, *tp, n0 = *n0p; | ||
| 190 | int i = 0, j; | ||
| 191 | |||
| 192 | tp = calloc(NULL, num + 2, sizeof(BN_ULONG)); | ||
| 193 | if (tp == NULL) | ||
| 194 | return 0; | ||
| 195 | |||
| 196 | for (i = 0; i < num; i++) { | ||
| 197 | c0 = bn_mul_add_words(tp, ap, num, bp[i]); | ||
| 198 | c1 = (tp[num] + c0) & BN_MASK2; | ||
| 199 | tp[num] = c1; | ||
| 200 | tp[num + 1] = (c1 < c0 ? 1 : 0); | ||
| 201 | |||
| 202 | c0 = bn_mul_add_words(tp, np, num, tp[0] * n0); | ||
| 203 | c1 = (tp[num] + c0) & BN_MASK2; | ||
| 204 | tp[num] = c1; | ||
| 205 | tp[num + 1] += (c1 < c0 ? 1 : 0); | ||
| 206 | for (j = 0; j <= num; j++) | ||
| 207 | tp[j] = tp[j + 1]; | ||
| 208 | } | ||
| 209 | |||
| 210 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { | ||
| 211 | c0 = bn_sub_words(rp, tp, np, num); | ||
| 212 | if (tp[num] != 0 || c0 == 0) { | ||
| 213 | goto out; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | memcpy(rp, tp, num * sizeof(BN_ULONG)); | ||
| 217 | out: | ||
| 218 | freezero(tp, (num + 2) * sizeof(BN_ULONG)); | ||
| 219 | return 1; | ||
| 220 | } | ||
| 221 | #else | ||
| 222 | int | ||
| 223 | bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | ||
| 224 | const BN_ULONG *np, const BN_ULONG *n0, int num) | ||
| 225 | { | ||
| 226 | return 0; | ||
| 227 | } | ||
| 228 | #endif /* OPENSSL_BN_ASM_MONT */ | ||
| 229 | #endif | ||
| 230 | |||
| 231 | #endif /* !BN_MUL_COMBA */ | ||
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c index f8b870266c..8b364ff716 100644 --- a/src/lib/libcrypto/bn/bn_mont.c +++ b/src/lib/libcrypto/bn/bn_mont.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | /* $OpenBSD: bn_mont.c,v 1.33 2023/01/16 16:53:19 jsing Exp $ */ | 1 | /* $OpenBSD: bn_mont.c,v 1.34 2023/01/28 17:07:02 jsing Exp $ */ |
| 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
| 3 | * All rights reserved. | 3 | * All rights reserved. |
| 4 | * | 4 | * |
| @@ -121,6 +121,59 @@ | |||
| 121 | 121 | ||
| 122 | #include "bn_local.h" | 122 | #include "bn_local.h" |
| 123 | 123 | ||
| 124 | #ifdef OPENSSL_NO_ASM | ||
| 125 | #ifdef OPENSSL_BN_ASM_MONT | ||
| 126 | int | ||
| 127 | bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | ||
| 128 | const BN_ULONG *np, const BN_ULONG *n0p, int num) | ||
| 129 | { | ||
| 130 | BN_ULONG c0, c1, *tp, n0 = *n0p; | ||
| 131 | int i = 0, j; | ||
| 132 | |||
| 133 | tp = calloc(NULL, num + 2, sizeof(BN_ULONG)); | ||
| 134 | if (tp == NULL) | ||
| 135 | return 0; | ||
| 136 | |||
| 137 | for (i = 0; i < num; i++) { | ||
| 138 | c0 = bn_mul_add_words(tp, ap, num, bp[i]); | ||
| 139 | c1 = (tp[num] + c0) & BN_MASK2; | ||
| 140 | tp[num] = c1; | ||
| 141 | tp[num + 1] = (c1 < c0 ? 1 : 0); | ||
| 142 | |||
| 143 | c0 = bn_mul_add_words(tp, np, num, tp[0] * n0); | ||
| 144 | c1 = (tp[num] + c0) & BN_MASK2; | ||
| 145 | tp[num] = c1; | ||
| 146 | tp[num + 1] += (c1 < c0 ? 1 : 0); | ||
| 147 | for (j = 0; j <= num; j++) | ||
| 148 | tp[j] = tp[j + 1]; | ||
| 149 | } | ||
| 150 | |||
| 151 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { | ||
| 152 | c0 = bn_sub_words(rp, tp, np, num); | ||
| 153 | if (tp[num] != 0 || c0 == 0) { | ||
| 154 | goto out; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | memcpy(rp, tp, num * sizeof(BN_ULONG)); | ||
| 158 | out: | ||
| 159 | freezero(tp, (num + 2) * sizeof(BN_ULONG)); | ||
| 160 | return 1; | ||
| 161 | } | ||
| 162 | #else /* !OPENSSL_BN_ASM_MONT */ | ||
| 163 | int | ||
| 164 | bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | ||
| 165 | const BN_ULONG *np, const BN_ULONG *n0, int num) | ||
| 166 | { | ||
| 167 | /* | ||
| 168 | * Return value of 0 indicates that multiplication/convolution was not | ||
| 169 | * performed to signal the caller to fall down to alternative/original | ||
| 170 | * code-path. | ||
| 171 | */ | ||
| 172 | return 0; | ||
| 173 | } | ||
| 174 | #endif /* !OPENSSL_BN_ASM_MONT */ | ||
| 175 | #endif /* OPENSSL_NO_ASM */ | ||
| 176 | |||
| 124 | #define MONT_WORD /* use the faster word-based algorithm */ | 177 | #define MONT_WORD /* use the faster word-based algorithm */ |
| 125 | 178 | ||
| 126 | #ifdef MONT_WORD | 179 | #ifdef MONT_WORD |
