diff options
author | jsing <> | 2023-01-28 17:07:02 +0000 |
---|---|---|
committer | jsing <> | 2023-01-28 17:07:02 +0000 |
commit | 23c73f9e2fbea1b0512f9d998ae2fc29cd8d9b05 (patch) | |
tree | 0c4f887499bd8a562c3ad584aa9b480775aef7de /src | |
parent | cef0f410a8e14e34c70bebdafebd855f0a70c5af (diff) | |
download | openbsd-23c73f9e2fbea1b0512f9d998ae2fc29cd8d9b05.tar.gz openbsd-23c73f9e2fbea1b0512f9d998ae2fc29cd8d9b05.tar.bz2 openbsd-23c73f9e2fbea1b0512f9d998ae2fc29cd8d9b05.zip |
Move the more readable version of bn_mul_mont() from bn_asm.c to bn_mont.c.
Nothing actually uses this code, as OPENSSL_BN_ASM_MONT is not defined
unless there is an assembly implementation available (not to mention that
defining both OPENSSL_NO_ASM and OPENSSL_BN_ASM_MONT at the same time is
extra strange).
Discussed with tb@
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/libcrypto/bn/bn_asm.c | 168 | ||||
-rw-r--r-- | src/lib/libcrypto/bn/bn_mont.c | 55 |
2 files changed, 55 insertions, 168 deletions
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c index e2b584ee85..bfdeabd9eb 100644 --- a/src/lib/libcrypto/bn/bn_asm.c +++ b/src/lib/libcrypto/bn/bn_asm.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_asm.c,v 1.23 2023/01/23 12:17:57 jsing Exp $ */ | 1 | /* $OpenBSD: bn_asm.c,v 1.24 2023/01/28 17:07:02 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -63,169 +63,3 @@ | |||
63 | 63 | ||
64 | #include "bn_local.h" | 64 | #include "bn_local.h" |
65 | 65 | ||
66 | #if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT) | ||
67 | |||
68 | #ifdef OPENSSL_NO_ASM | ||
69 | #ifdef OPENSSL_BN_ASM_MONT | ||
70 | /* | ||
71 | * This is essentially reference implementation, which may or may not | ||
72 | * result in performance improvement. E.g. on IA-32 this routine was | ||
73 | * observed to give 40% faster rsa1024 private key operations and 10% | ||
74 | * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only | ||
75 | * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a | ||
76 | * reference implementation, one to be used as starting point for | ||
77 | * platform-specific assembler. Mentioned numbers apply to compiler | ||
78 | * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and | ||
79 | * can vary not only from platform to platform, but even for compiler | ||
80 | * versions. Assembler vs. assembler improvement coefficients can | ||
81 | * [and are known to] differ and are to be documented elsewhere. | ||
82 | */ | ||
83 | int | ||
84 | bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0p, int num) | ||
85 | { | ||
86 | BN_ULONG c0, c1, ml, *tp, n0; | ||
87 | #ifdef mul64 | ||
88 | BN_ULONG mh; | ||
89 | #endif | ||
90 | int i = 0, j; | ||
91 | |||
92 | #if 0 /* template for platform-specific implementation */ | ||
93 | if (ap == bp) | ||
94 | return bn_sqr_mont(rp, ap, np, n0p, num); | ||
95 | #endif | ||
96 | tp = reallocarray(NULL, num + 2, sizeof(BN_ULONG)); | ||
97 | if (tp == NULL) | ||
98 | return 0; | ||
99 | |||
100 | n0 = *n0p; | ||
101 | |||
102 | c0 = 0; | ||
103 | ml = bp[0]; | ||
104 | #ifdef mul64 | ||
105 | mh = HBITS(ml); | ||
106 | ml = LBITS(ml); | ||
107 | for (j = 0; j < num; ++j) | ||
108 | mul(tp[j], ap[j], ml, mh, c0); | ||
109 | #else | ||
110 | for (j = 0; j < num; ++j) | ||
111 | mul(tp[j], ap[j], ml, c0); | ||
112 | #endif | ||
113 | |||
114 | tp[num] = c0; | ||
115 | tp[num + 1] = 0; | ||
116 | goto enter; | ||
117 | |||
118 | for (i = 0; i < num; i++) { | ||
119 | c0 = 0; | ||
120 | ml = bp[i]; | ||
121 | #ifdef mul64 | ||
122 | mh = HBITS(ml); | ||
123 | ml = LBITS(ml); | ||
124 | for (j = 0; j < num; ++j) | ||
125 | mul_add(tp[j], ap[j], ml, mh, c0); | ||
126 | #else | ||
127 | for (j = 0; j < num; ++j) | ||
128 | mul_add(tp[j], ap[j], ml, c0); | ||
129 | #endif | ||
130 | c1 = (tp[num] + c0) & BN_MASK2; | ||
131 | tp[num] = c1; | ||
132 | tp[num + 1] = (c1 < c0 ? 1 : 0); | ||
133 | enter: | ||
134 | c1 = tp[0]; | ||
135 | ml = (c1 * n0) & BN_MASK2; | ||
136 | c0 = 0; | ||
137 | #ifdef mul64 | ||
138 | mh = HBITS(ml); | ||
139 | ml = LBITS(ml); | ||
140 | mul_add(c1, np[0], ml, mh, c0); | ||
141 | #else | ||
142 | mul_add(c1, ml, np[0], c0); | ||
143 | #endif | ||
144 | for (j = 1; j < num; j++) { | ||
145 | c1 = tp[j]; | ||
146 | #ifdef mul64 | ||
147 | mul_add(c1, np[j], ml, mh, c0); | ||
148 | #else | ||
149 | mul_add(c1, ml, np[j], c0); | ||
150 | #endif | ||
151 | tp[j - 1] = c1 & BN_MASK2; | ||
152 | } | ||
153 | c1 = (tp[num] + c0) & BN_MASK2; | ||
154 | tp[num - 1] = c1; | ||
155 | tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0); | ||
156 | } | ||
157 | |||
158 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { | ||
159 | c0 = bn_sub_words(rp, tp, np, num); | ||
160 | if (tp[num] != 0 || c0 == 0) { | ||
161 | goto out; | ||
162 | } | ||
163 | } | ||
164 | memcpy(rp, tp, num * sizeof(BN_ULONG)); | ||
165 | out: | ||
166 | freezero(tp, (num + 2) * sizeof(BN_ULONG)); | ||
167 | return 1; | ||
168 | } | ||
169 | #else | ||
170 | /* | ||
171 | * Return value of 0 indicates that multiplication/convolution was not | ||
172 | * performed to signal the caller to fall down to alternative/original | ||
173 | * code-path. | ||
174 | */ | ||
175 | int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num) | ||
176 | { return 0; | ||
177 | } | ||
178 | #endif /* OPENSSL_BN_ASM_MONT */ | ||
179 | #endif | ||
180 | |||
181 | #else /* !BN_MUL_COMBA */ | ||
182 | |||
183 | #ifdef OPENSSL_NO_ASM | ||
184 | #ifdef OPENSSL_BN_ASM_MONT | ||
185 | int | ||
186 | bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | ||
187 | const BN_ULONG *np, const BN_ULONG *n0p, int num) | ||
188 | { | ||
189 | BN_ULONG c0, c1, *tp, n0 = *n0p; | ||
190 | int i = 0, j; | ||
191 | |||
192 | tp = calloc(NULL, num + 2, sizeof(BN_ULONG)); | ||
193 | if (tp == NULL) | ||
194 | return 0; | ||
195 | |||
196 | for (i = 0; i < num; i++) { | ||
197 | c0 = bn_mul_add_words(tp, ap, num, bp[i]); | ||
198 | c1 = (tp[num] + c0) & BN_MASK2; | ||
199 | tp[num] = c1; | ||
200 | tp[num + 1] = (c1 < c0 ? 1 : 0); | ||
201 | |||
202 | c0 = bn_mul_add_words(tp, np, num, tp[0] * n0); | ||
203 | c1 = (tp[num] + c0) & BN_MASK2; | ||
204 | tp[num] = c1; | ||
205 | tp[num + 1] += (c1 < c0 ? 1 : 0); | ||
206 | for (j = 0; j <= num; j++) | ||
207 | tp[j] = tp[j + 1]; | ||
208 | } | ||
209 | |||
210 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { | ||
211 | c0 = bn_sub_words(rp, tp, np, num); | ||
212 | if (tp[num] != 0 || c0 == 0) { | ||
213 | goto out; | ||
214 | } | ||
215 | } | ||
216 | memcpy(rp, tp, num * sizeof(BN_ULONG)); | ||
217 | out: | ||
218 | freezero(tp, (num + 2) * sizeof(BN_ULONG)); | ||
219 | return 1; | ||
220 | } | ||
221 | #else | ||
222 | int | ||
223 | bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | ||
224 | const BN_ULONG *np, const BN_ULONG *n0, int num) | ||
225 | { | ||
226 | return 0; | ||
227 | } | ||
228 | #endif /* OPENSSL_BN_ASM_MONT */ | ||
229 | #endif | ||
230 | |||
231 | #endif /* !BN_MUL_COMBA */ | ||
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c index f8b870266c..8b364ff716 100644 --- a/src/lib/libcrypto/bn/bn_mont.c +++ b/src/lib/libcrypto/bn/bn_mont.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: bn_mont.c,v 1.33 2023/01/16 16:53:19 jsing Exp $ */ | 1 | /* $OpenBSD: bn_mont.c,v 1.34 2023/01/28 17:07:02 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -121,6 +121,59 @@ | |||
121 | 121 | ||
122 | #include "bn_local.h" | 122 | #include "bn_local.h" |
123 | 123 | ||
124 | #ifdef OPENSSL_NO_ASM | ||
125 | #ifdef OPENSSL_BN_ASM_MONT | ||
126 | int | ||
127 | bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | ||
128 | const BN_ULONG *np, const BN_ULONG *n0p, int num) | ||
129 | { | ||
130 | BN_ULONG c0, c1, *tp, n0 = *n0p; | ||
131 | int i = 0, j; | ||
132 | |||
133 | tp = calloc(NULL, num + 2, sizeof(BN_ULONG)); | ||
134 | if (tp == NULL) | ||
135 | return 0; | ||
136 | |||
137 | for (i = 0; i < num; i++) { | ||
138 | c0 = bn_mul_add_words(tp, ap, num, bp[i]); | ||
139 | c1 = (tp[num] + c0) & BN_MASK2; | ||
140 | tp[num] = c1; | ||
141 | tp[num + 1] = (c1 < c0 ? 1 : 0); | ||
142 | |||
143 | c0 = bn_mul_add_words(tp, np, num, tp[0] * n0); | ||
144 | c1 = (tp[num] + c0) & BN_MASK2; | ||
145 | tp[num] = c1; | ||
146 | tp[num + 1] += (c1 < c0 ? 1 : 0); | ||
147 | for (j = 0; j <= num; j++) | ||
148 | tp[j] = tp[j + 1]; | ||
149 | } | ||
150 | |||
151 | if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { | ||
152 | c0 = bn_sub_words(rp, tp, np, num); | ||
153 | if (tp[num] != 0 || c0 == 0) { | ||
154 | goto out; | ||
155 | } | ||
156 | } | ||
157 | memcpy(rp, tp, num * sizeof(BN_ULONG)); | ||
158 | out: | ||
159 | freezero(tp, (num + 2) * sizeof(BN_ULONG)); | ||
160 | return 1; | ||
161 | } | ||
162 | #else /* !OPENSSL_BN_ASM_MONT */ | ||
163 | int | ||
164 | bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, | ||
165 | const BN_ULONG *np, const BN_ULONG *n0, int num) | ||
166 | { | ||
167 | /* | ||
168 | * Return value of 0 indicates that multiplication/convolution was not | ||
169 | * performed to signal the caller to fall down to alternative/original | ||
170 | * code-path. | ||
171 | */ | ||
172 | return 0; | ||
173 | } | ||
174 | #endif /* !OPENSSL_BN_ASM_MONT */ | ||
175 | #endif /* OPENSSL_NO_ASM */ | ||
176 | |||
124 | #define MONT_WORD /* use the faster word-based algorithm */ | 177 | #define MONT_WORD /* use the faster word-based algorithm */ |
125 | 178 | ||
126 | #ifdef MONT_WORD | 179 | #ifdef MONT_WORD |