summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorjsing <>2024-12-06 11:57:18 +0000
committerjsing <>2024-12-06 11:57:18 +0000
commitd8f769ca48f14cb8455dfa8f2334c3c683502fe4 (patch)
tree4ddefe0cdee0b51074793a8db7ee53de07047a32 /src
parentc8f5ae0825fe646838447e04c2976ed4321430b6 (diff)
downloadopenbsd-d8f769ca48f14cb8455dfa8f2334c3c683502fe4.tar.gz
openbsd-d8f769ca48f14cb8455dfa8f2334c3c683502fe4.tar.bz2
openbsd-d8f769ca48f14cb8455dfa8f2334c3c683502fe4.zip
Provide a SHA-1 assembly implementation for amd64 using SHA-NI.
This provides a SHA-1 assembly implementation for amd64, which uses the Intel SHA Extensions (aka SHA New Instructions or SHA-NI). This provides a 2-2.5x performance gain on some Intel CPUs and many AMD CPUs. ok tb@
Diffstat (limited to 'src')
-rw-r--r--src/lib/libcrypto/arch/amd64/Makefile.inc3
-rw-r--r--src/lib/libcrypto/sha/sha1_amd64.c8
-rw-r--r--src/lib/libcrypto/sha/sha1_amd64_shani.S170
3 files changed, 179 insertions, 2 deletions
diff --git a/src/lib/libcrypto/arch/amd64/Makefile.inc b/src/lib/libcrypto/arch/amd64/Makefile.inc
index 33c7dbba26..f8f829cca1 100644
--- a/src/lib/libcrypto/arch/amd64/Makefile.inc
+++ b/src/lib/libcrypto/arch/amd64/Makefile.inc
@@ -1,4 +1,4 @@
1# $OpenBSD: Makefile.inc,v 1.34 2024/12/04 13:13:33 jsing Exp $ 1# $OpenBSD: Makefile.inc,v 1.35 2024/12/06 11:57:17 jsing Exp $
2 2
3# amd64-specific libcrypto build rules 3# amd64-specific libcrypto build rules
4 4
@@ -51,6 +51,7 @@ SSLASM+= rc4 rc4-x86_64
51CFLAGS+= -DSHA1_ASM 51CFLAGS+= -DSHA1_ASM
52SRCS+= sha1_amd64.c 52SRCS+= sha1_amd64.c
53SRCS+= sha1_amd64_generic.S 53SRCS+= sha1_amd64_generic.S
54SRCS+= sha1_amd64_shani.S
54CFLAGS+= -DSHA256_ASM 55CFLAGS+= -DSHA256_ASM
55SRCS+= sha256_amd64.c 56SRCS+= sha256_amd64.c
56SRCS+= sha256_amd64_generic.S 57SRCS+= sha256_amd64_generic.S
diff --git a/src/lib/libcrypto/sha/sha1_amd64.c b/src/lib/libcrypto/sha/sha1_amd64.c
index b3d4ab1263..2976cc7e6e 100644
--- a/src/lib/libcrypto/sha/sha1_amd64.c
+++ b/src/lib/libcrypto/sha/sha1_amd64.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha1_amd64.c,v 1.1 2024/12/04 13:13:33 jsing Exp $ */ 1/* $OpenBSD: sha1_amd64.c,v 1.2 2024/12/06 11:57:18 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -20,9 +20,15 @@
20#include "crypto_arch.h" 20#include "crypto_arch.h"
21 21
22void sha1_block_generic(SHA_CTX *ctx, const void *in, size_t num); 22void sha1_block_generic(SHA_CTX *ctx, const void *in, size_t num);
23void sha1_block_shani(SHA_CTX *ctx, const void *in, size_t num);
23 24
24void 25void
25sha1_block_data_order(SHA_CTX *ctx, const void *in, size_t num) 26sha1_block_data_order(SHA_CTX *ctx, const void *in, size_t num)
26{ 27{
28 if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_SHA) != 0) {
29 sha1_block_shani(ctx, in, num);
30 return;
31 }
32
27 sha1_block_generic(ctx, in, num); 33 sha1_block_generic(ctx, in, num);
28} 34}
diff --git a/src/lib/libcrypto/sha/sha1_amd64_shani.S b/src/lib/libcrypto/sha/sha1_amd64_shani.S
new file mode 100644
index 0000000000..d7699d10f1
--- /dev/null
+++ b/src/lib/libcrypto/sha/sha1_amd64_shani.S
@@ -0,0 +1,170 @@
1/* $OpenBSD: sha1_amd64_shani.S,v 1.1 2024/12/06 11:57:18 jsing Exp $ */
2/*
3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#ifdef __CET__
19#include <cet.h>
20#else
21#define _CET_ENDBR
22#endif
23
24/*
25 * SHA-1 implementation using the Intel SHA extensions:
26 *
27 * https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html
28 */
29
30#define ctx %rdi
31#define in %rsi
32#define num %rdx
33
34#define end %rbx
35
36#define xabcd_save %xmm0
37#define xe_save %xmm1
38
39#define xabcd %xmm2
40#define xe0 %xmm3
41#define xe1 %xmm4
42
43#define xmsg0 %xmm5
44#define xmsg1 %xmm6
45#define xmsg2 %xmm7
46#define xmsg3 %xmm8
47
48#define xshufmask %xmm9
49
50
51#define sha1_message_schedule_load(idx, m, xmsg) \
52 movdqu (idx*16)(m), xmsg; \
53 pshufb xshufmask, xmsg;
54
55#define sha1_message_schedule_update(xm0, xm1, xm2, xm3) \
56 sha1msg1 xm1, xm0; \
57 pxor xm2, xm0; \
58 sha1msg2 xm3, xm0;
59
60#define sha1_shani_round(fn, xmsg, xe, xe_next) \
61 sha1nexte xmsg, xe; \
62 movdqa xabcd, xe_next; \
63 sha1rnds4 fn, xe, xabcd;
64
65#define sha1_shani_round_load(fn, idx, m, xmsg, xe, xe_next) \
66 sha1_message_schedule_load(idx, m, xmsg); \
67 sha1_shani_round(fn, xmsg, xe, xe_next);
68
69#define sha1_shani_round_update(fn, xm0, xm1, xm2, xm3, xe, xe_next) \
70 sha1_message_schedule_update(xm0, xm1, xm2, xm3); \
71 sha1_shani_round(fn, xm0, xe, xe_next);
72
73
74.text
75
76/*
77 * void sha1_block_shani(SHA256_CTX *ctx, const void *in, size_t num);
78 *
79 * Standard x86-64 ABI: rdi = ctx, rsi = in, rdx = num
80 */
81.align 16
82.globl sha1_block_shani
83.type sha1_block_shani,@function
84sha1_block_shani:
85 _CET_ENDBR
86
87 /* Save callee save registers. */
88 pushq %rbx
89
90 /* Compute end of message. */
91 shlq $6, num
92 leaq (in, num, 1), end
93
94 /* Load endian shuffle mask. */
95 movdqa shufmask(%rip), xshufmask
96
97 /* Load current hash state from context. */
98 movdqu (0*16)(ctx), xabcd
99 pshufd $0x1b, xabcd, xabcd /* dcba -> abcd */
100 pxor xe0, xe0
101 pinsrd $3, (1*16)(ctx), xe0 /* e */
102
103 jmp .Lshani_block_loop
104
105.align 16
106.Lshani_block_loop:
107 /* Save state for accumulation. */
108 movdqa xabcd, xabcd_save
109 movdqa xe0, xe_save
110
111 /* Rounds 0 through 15 (four rounds at a time). */
112 sha1_message_schedule_load(0, in, xmsg0);
113 paddd xmsg0, xe0
114 movdqa xabcd, xe1
115 sha1rnds4 $0, xe0, xabcd
116
117 sha1_shani_round_load($0, 1, in, xmsg1, xe1, xe0);
118 sha1_shani_round_load($0, 2, in, xmsg2, xe0, xe1);
119 sha1_shani_round_load($0, 3, in, xmsg3, xe1, xe0);
120
121 /* Rounds 16 through 79 (four rounds at a time). */
122 sha1_shani_round_update($0, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1)
123 sha1_shani_round_update($1, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0)
124 sha1_shani_round_update($1, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1)
125 sha1_shani_round_update($1, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0)
126
127 sha1_shani_round_update($1, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1)
128 sha1_shani_round_update($1, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0)
129 sha1_shani_round_update($2, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1)
130 sha1_shani_round_update($2, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0)
131
132 sha1_shani_round_update($2, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1)
133 sha1_shani_round_update($2, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0)
134 sha1_shani_round_update($2, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1)
135 sha1_shani_round_update($3, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0)
136
137 sha1_shani_round_update($3, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1)
138 sha1_shani_round_update($3, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0)
139 sha1_shani_round_update($3, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1)
140 sha1_shani_round_update($3, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0)
141
142 /* Accumulate hash state. */
143 paddd xabcd_save, xabcd
144 sha1nexte xe_save, xe0
145
146 addq $64, in
147 cmpq end, in
148 jb .Lshani_block_loop
149
150 /* Update stored hash context. */
151 pshufd $0x1b, xabcd, xabcd /* abcd -> dcba */
152 movdqu xabcd, (0*16)(ctx)
153 pextrd $3, xe0, (1*16)(ctx) /* e */
154
155 /* Restore callee save registers. */
156 popq %rbx
157
158 ret
159
160.rodata
161
162/*
163 * Shuffle mask - byte reversal for little endian to big endian word conversion,
164 * and reordering to abcd.
165 */
166.align 16
167.type shufmask,@object
168shufmask:
169.octa 0x000102030405060708090a0b0c0d0e0f
170.size shufmask,.-shufmask