summaryrefslogtreecommitdiff
path: root/src/lib
diff options
context:
space:
mode:
authorjsing <>2023-05-12 10:10:55 +0000
committerjsing <>2023-05-12 10:10:55 +0000
commit2dc9b70a8ac534811265e7db816fc1f998b8c590 (patch)
tree25dc8d0d7e0a5fa5ec348c5791ab2db5c1aabc7d /src/lib
parent1f7f935f99968dab525b2116ad7a93160de45c81 (diff)
downloadopenbsd-2dc9b70a8ac534811265e7db816fc1f998b8c590.tar.gz
openbsd-2dc9b70a8ac534811265e7db816fc1f998b8c590.tar.bz2
openbsd-2dc9b70a8ac534811265e7db816fc1f998b8c590.zip
Reduce the number of SHA-512 C implementations from three to one.
We currently have three C implementations for SHA-512 - a version that is optimised for CPUs with minimal registers (specifically i386), a regular implementation and a semi-unrolled implementation. Testing on a ~15 year old i386 CPU, the fastest version is actually the semi-unrolled version (not to mention that we still currently have an i586 assembly implementation that is used on i386 instead...). More decent architectures do not seem to care between the regular and semi-unrolled version, presumably since they are effectively doing the same thing in hardware during execution. Remove all except the semi-unrolled version. ok tb@
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/libcrypto/sha/sha512.c135
1 files changed, 1 insertions, 134 deletions
diff --git a/src/lib/libcrypto/sha/sha512.c b/src/lib/libcrypto/sha/sha512.c
index 4a4194350b..2840fa9446 100644
--- a/src/lib/libcrypto/sha/sha512.c
+++ b/src/lib/libcrypto/sha/sha512.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha512.c,v 1.34 2023/04/14 10:45:15 jsing Exp $ */ 1/* $OpenBSD: sha512.c,v 1.35 2023/05/12 10:10:55 jsing Exp $ */
2/* ==================================================================== 2/* ====================================================================
3 * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved. 3 * Copyright (c) 1998-2011 The OpenSSL Project. All rights reserved.
4 * 4 *
@@ -153,137 +153,6 @@ static const SHA_LONG64 K512[80] = {
153#define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z))) 153#define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z)))
154#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) 154#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
155 155
156
157#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
158/*
159 * This code should give better results on 32-bit CPU with less than
160 * ~24 registers, both size and performance wise...
161 */
162static void
163sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num)
164{
165 const SHA_LONG64 *W = in;
166 SHA_LONG64 A, E, T;
167 SHA_LONG64 X[9 + 80], *F;
168 int i;
169
170 while (num--) {
171
172 F = X + 80;
173 A = ctx->h[0];
174 F[1] = ctx->h[1];
175 F[2] = ctx->h[2];
176 F[3] = ctx->h[3];
177 E = ctx->h[4];
178 F[5] = ctx->h[5];
179 F[6] = ctx->h[6];
180 F[7] = ctx->h[7];
181
182 for (i = 0; i < 16; i++, F--) {
183 T = PULL64(W[i]);
184 F[0] = A;
185 F[4] = E;
186 F[8] = T;
187 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
188 E = F[3] + T;
189 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
190 }
191
192 for (; i < 80; i++, F--) {
193 T = sigma0(F[8 + 16 - 1]);
194 T += sigma1(F[8 + 16 - 14]);
195 T += F[8 + 16] + F[8 + 16 - 9];
196
197 F[0] = A;
198 F[4] = E;
199 F[8] = T;
200 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i];
201 E = F[3] + T;
202 A = T + Sigma0(A) + Maj(A, F[1], F[2]);
203 }
204
205 ctx->h[0] += A;
206 ctx->h[1] += F[1];
207 ctx->h[2] += F[2];
208 ctx->h[3] += F[3];
209 ctx->h[4] += E;
210 ctx->h[5] += F[5];
211 ctx->h[6] += F[6];
212 ctx->h[7] += F[7];
213
214 W += SHA_LBLOCK;
215 }
216}
217
218#elif defined(OPENSSL_SMALL_FOOTPRINT)
219
220static void
221sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num)
222{
223 const SHA_LONG64 *W = in;
224 SHA_LONG64 a, b, c, d, e, f, g, h, s0, s1, T1, T2;
225 SHA_LONG64 X[16];
226 int i;
227
228 while (num--) {
229
230 a = ctx->h[0];
231 b = ctx->h[1];
232 c = ctx->h[2];
233 d = ctx->h[3];
234 e = ctx->h[4];
235 f = ctx->h[5];
236 g = ctx->h[6];
237 h = ctx->h[7];
238
239 for (i = 0; i < 16; i++) {
240 T1 = X[i] = PULL64(W[i]);
241 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
242 T2 = Sigma0(a) + Maj(a, b, c);
243 h = g;
244 g = f;
245 f = e;
246 e = d + T1;
247 d = c;
248 c = b;
249 b = a;
250 a = T1 + T2;
251 }
252
253 for (; i < 80; i++) {
254 s0 = X[(i + 1)&0x0f];
255 s0 = sigma0(s0);
256 s1 = X[(i + 14)&0x0f];
257 s1 = sigma1(s1);
258
259 T1 = X[i&0xf] += s0 + s1 + X[(i + 9)&0xf];
260 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i];
261 T2 = Sigma0(a) + Maj(a, b, c);
262 h = g;
263 g = f;
264 f = e;
265 e = d + T1;
266 d = c;
267 c = b;
268 b = a;
269 a = T1 + T2;
270 }
271
272 ctx->h[0] += a;
273 ctx->h[1] += b;
274 ctx->h[2] += c;
275 ctx->h[3] += d;
276 ctx->h[4] += e;
277 ctx->h[5] += f;
278 ctx->h[6] += g;
279 ctx->h[7] += h;
280
281 W += SHA_LBLOCK;
282 }
283}
284
285#else
286
287#define ROUND_00_15(i, a, b, c, d, e, f, g, h) do { \ 156#define ROUND_00_15(i, a, b, c, d, e, f, g, h) do { \
288 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i]; \ 157 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i]; \
289 h = Sigma0(a) + Maj(a, b, c); \ 158 h = Sigma0(a) + Maj(a, b, c); \
@@ -379,8 +248,6 @@ sha512_block_data_order(SHA512_CTX *ctx, const void *in, size_t num)
379 } 248 }
380} 249}
381 250
382#endif
383
384#endif /* SHA512_ASM */ 251#endif /* SHA512_ASM */
385 252
386int 253int