summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjsing <>2023-01-28 17:07:02 +0000
committerjsing <>2023-01-28 17:07:02 +0000
commit23c73f9e2fbea1b0512f9d998ae2fc29cd8d9b05 (patch)
tree0c4f887499bd8a562c3ad584aa9b480775aef7de
parentcef0f410a8e14e34c70bebdafebd855f0a70c5af (diff)
downloadopenbsd-23c73f9e2fbea1b0512f9d998ae2fc29cd8d9b05.tar.gz
openbsd-23c73f9e2fbea1b0512f9d998ae2fc29cd8d9b05.tar.bz2
openbsd-23c73f9e2fbea1b0512f9d998ae2fc29cd8d9b05.zip
Move the more readable version of bn_mul_mont() from bn_asm.c to bn_mont.c.
Nothing actually uses this code, as OPENSSL_BN_ASM_MONT is not defined unless there is an assembly implementation available (not to mention that defining both OPENSSL_NO_ASM and OPENSSL_BN_ASM_MONT at the same time is extra strange). Discussed with tb@
-rw-r--r--src/lib/libcrypto/bn/bn_asm.c168
-rw-r--r--src/lib/libcrypto/bn/bn_mont.c55
2 files changed, 55 insertions, 168 deletions
diff --git a/src/lib/libcrypto/bn/bn_asm.c b/src/lib/libcrypto/bn/bn_asm.c
index e2b584ee85..bfdeabd9eb 100644
--- a/src/lib/libcrypto/bn/bn_asm.c
+++ b/src/lib/libcrypto/bn/bn_asm.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_asm.c,v 1.23 2023/01/23 12:17:57 jsing Exp $ */ 1/* $OpenBSD: bn_asm.c,v 1.24 2023/01/28 17:07:02 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -63,169 +63,3 @@
63 63
64#include "bn_local.h" 64#include "bn_local.h"
65 65
66#if defined(BN_MUL_COMBA) && !defined(OPENSSL_SMALL_FOOTPRINT)
67
68#ifdef OPENSSL_NO_ASM
69#ifdef OPENSSL_BN_ASM_MONT
70/*
71 * This is essentially reference implementation, which may or may not
72 * result in performance improvement. E.g. on IA-32 this routine was
73 * observed to give 40% faster rsa1024 private key operations and 10%
74 * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
75 * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
76 * reference implementation, one to be used as starting point for
77 * platform-specific assembler. Mentioned numbers apply to compiler
78 * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and
79 * can vary not only from platform to platform, but even for compiler
80 * versions. Assembler vs. assembler improvement coefficients can
81 * [and are known to] differ and are to be documented elsewhere.
82 */
83int
84bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0p, int num)
85{
86 BN_ULONG c0, c1, ml, *tp, n0;
87#ifdef mul64
88 BN_ULONG mh;
89#endif
90 int i = 0, j;
91
92#if 0 /* template for platform-specific implementation */
93 if (ap == bp)
94 return bn_sqr_mont(rp, ap, np, n0p, num);
95#endif
96 tp = reallocarray(NULL, num + 2, sizeof(BN_ULONG));
97 if (tp == NULL)
98 return 0;
99
100 n0 = *n0p;
101
102 c0 = 0;
103 ml = bp[0];
104#ifdef mul64
105 mh = HBITS(ml);
106 ml = LBITS(ml);
107 for (j = 0; j < num; ++j)
108 mul(tp[j], ap[j], ml, mh, c0);
109#else
110 for (j = 0; j < num; ++j)
111 mul(tp[j], ap[j], ml, c0);
112#endif
113
114 tp[num] = c0;
115 tp[num + 1] = 0;
116 goto enter;
117
118 for (i = 0; i < num; i++) {
119 c0 = 0;
120 ml = bp[i];
121#ifdef mul64
122 mh = HBITS(ml);
123 ml = LBITS(ml);
124 for (j = 0; j < num; ++j)
125 mul_add(tp[j], ap[j], ml, mh, c0);
126#else
127 for (j = 0; j < num; ++j)
128 mul_add(tp[j], ap[j], ml, c0);
129#endif
130 c1 = (tp[num] + c0) & BN_MASK2;
131 tp[num] = c1;
132 tp[num + 1] = (c1 < c0 ? 1 : 0);
133enter:
134 c1 = tp[0];
135 ml = (c1 * n0) & BN_MASK2;
136 c0 = 0;
137#ifdef mul64
138 mh = HBITS(ml);
139 ml = LBITS(ml);
140 mul_add(c1, np[0], ml, mh, c0);
141#else
142 mul_add(c1, ml, np[0], c0);
143#endif
144 for (j = 1; j < num; j++) {
145 c1 = tp[j];
146#ifdef mul64
147 mul_add(c1, np[j], ml, mh, c0);
148#else
149 mul_add(c1, ml, np[j], c0);
150#endif
151 tp[j - 1] = c1 & BN_MASK2;
152 }
153 c1 = (tp[num] + c0) & BN_MASK2;
154 tp[num - 1] = c1;
155 tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0);
156 }
157
158 if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
159 c0 = bn_sub_words(rp, tp, np, num);
160 if (tp[num] != 0 || c0 == 0) {
161 goto out;
162 }
163 }
164 memcpy(rp, tp, num * sizeof(BN_ULONG));
165out:
166 freezero(tp, (num + 2) * sizeof(BN_ULONG));
167 return 1;
168}
169#else
170/*
171 * Return value of 0 indicates that multiplication/convolution was not
172 * performed to signal the caller to fall down to alternative/original
173 * code-path.
174 */
175int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0, int num)
176 { return 0;
177}
178#endif /* OPENSSL_BN_ASM_MONT */
179#endif
180
181#else /* !BN_MUL_COMBA */
182
183#ifdef OPENSSL_NO_ASM
184#ifdef OPENSSL_BN_ASM_MONT
185int
186bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
187 const BN_ULONG *np, const BN_ULONG *n0p, int num)
188{
189 BN_ULONG c0, c1, *tp, n0 = *n0p;
190 int i = 0, j;
191
192 tp = calloc(NULL, num + 2, sizeof(BN_ULONG));
193 if (tp == NULL)
194 return 0;
195
196 for (i = 0; i < num; i++) {
197 c0 = bn_mul_add_words(tp, ap, num, bp[i]);
198 c1 = (tp[num] + c0) & BN_MASK2;
199 tp[num] = c1;
200 tp[num + 1] = (c1 < c0 ? 1 : 0);
201
202 c0 = bn_mul_add_words(tp, np, num, tp[0] * n0);
203 c1 = (tp[num] + c0) & BN_MASK2;
204 tp[num] = c1;
205 tp[num + 1] += (c1 < c0 ? 1 : 0);
206 for (j = 0; j <= num; j++)
207 tp[j] = tp[j + 1];
208 }
209
210 if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
211 c0 = bn_sub_words(rp, tp, np, num);
212 if (tp[num] != 0 || c0 == 0) {
213 goto out;
214 }
215 }
216 memcpy(rp, tp, num * sizeof(BN_ULONG));
217out:
218 freezero(tp, (num + 2) * sizeof(BN_ULONG));
219 return 1;
220}
221#else
222int
223bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
224 const BN_ULONG *np, const BN_ULONG *n0, int num)
225{
226 return 0;
227}
228#endif /* OPENSSL_BN_ASM_MONT */
229#endif
230
231#endif /* !BN_MUL_COMBA */
diff --git a/src/lib/libcrypto/bn/bn_mont.c b/src/lib/libcrypto/bn/bn_mont.c
index f8b870266c..8b364ff716 100644
--- a/src/lib/libcrypto/bn/bn_mont.c
+++ b/src/lib/libcrypto/bn/bn_mont.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: bn_mont.c,v 1.33 2023/01/16 16:53:19 jsing Exp $ */ 1/* $OpenBSD: bn_mont.c,v 1.34 2023/01/28 17:07:02 jsing Exp $ */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved. 3 * All rights reserved.
4 * 4 *
@@ -121,6 +121,59 @@
121 121
122#include "bn_local.h" 122#include "bn_local.h"
123 123
124#ifdef OPENSSL_NO_ASM
125#ifdef OPENSSL_BN_ASM_MONT
126int
127bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
128 const BN_ULONG *np, const BN_ULONG *n0p, int num)
129{
130 BN_ULONG c0, c1, *tp, n0 = *n0p;
131 int i = 0, j;
132
133 tp = calloc(NULL, num + 2, sizeof(BN_ULONG));
134 if (tp == NULL)
135 return 0;
136
137 for (i = 0; i < num; i++) {
138 c0 = bn_mul_add_words(tp, ap, num, bp[i]);
139 c1 = (tp[num] + c0) & BN_MASK2;
140 tp[num] = c1;
141 tp[num + 1] = (c1 < c0 ? 1 : 0);
142
143 c0 = bn_mul_add_words(tp, np, num, tp[0] * n0);
144 c1 = (tp[num] + c0) & BN_MASK2;
145 tp[num] = c1;
146 tp[num + 1] += (c1 < c0 ? 1 : 0);
147 for (j = 0; j <= num; j++)
148 tp[j] = tp[j + 1];
149 }
150
151 if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
152 c0 = bn_sub_words(rp, tp, np, num);
153 if (tp[num] != 0 || c0 == 0) {
154 goto out;
155 }
156 }
157 memcpy(rp, tp, num * sizeof(BN_ULONG));
158out:
159 freezero(tp, (num + 2) * sizeof(BN_ULONG));
160 return 1;
161}
162#else /* !OPENSSL_BN_ASM_MONT */
163int
164bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
165 const BN_ULONG *np, const BN_ULONG *n0, int num)
166{
167 /*
168 * Return value of 0 indicates that multiplication/convolution was not
169 * performed to signal the caller to fall down to alternative/original
170 * code-path.
171 */
172 return 0;
173}
174#endif /* !OPENSSL_BN_ASM_MONT */
175#endif /* OPENSSL_NO_ASM */
176
124#define MONT_WORD /* use the faster word-based algorithm */ 177#define MONT_WORD /* use the faster word-based algorithm */
125 178
126#ifdef MONT_WORD 179#ifdef MONT_WORD