summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjsing <>2024-11-16 15:31:36 +0000
committerjsing <>2024-11-16 15:31:36 +0000
commit23d9a213525c4cdbc5fba2b8cf4109f087fe41d3 (patch)
treeca867c18349e0ee9b5d1d2d76bade5e031c5ad2a
parentd87a513f1f66019c84942357247dbcdc0c318f2e (diff)
downloadopenbsd-23d9a213525c4cdbc5fba2b8cf4109f087fe41d3.tar.gz
openbsd-23d9a213525c4cdbc5fba2b8cf4109f087fe41d3.tar.bz2
openbsd-23d9a213525c4cdbc5fba2b8cf4109f087fe41d3.zip
Provide a SHA-256 assembly implementation for amd64 using SHA-NI.
This provides a SHA-256 assembly implementation for amd64, which uses the Intel SHA Extensions (aka SHA New Instructions or SHA-NI). This provides a 3-5x performance gain on some Intel CPUs and many AMD CPUs. ok tb@
-rw-r--r--src/lib/libcrypto/arch/amd64/Makefile.inc3
-rw-r--r--src/lib/libcrypto/sha/sha256_amd64.c10
-rw-r--r--src/lib/libcrypto/sha/sha256_amd64_shani.S209
3 files changed, 220 insertions, 2 deletions
diff --git a/src/lib/libcrypto/arch/amd64/Makefile.inc b/src/lib/libcrypto/arch/amd64/Makefile.inc
index 9ba5634f87..fe22385633 100644
--- a/src/lib/libcrypto/arch/amd64/Makefile.inc
+++ b/src/lib/libcrypto/arch/amd64/Makefile.inc
@@ -1,4 +1,4 @@
1# $OpenBSD: Makefile.inc,v 1.32 2024/11/16 14:56:39 jsing Exp $ 1# $OpenBSD: Makefile.inc,v 1.33 2024/11/16 15:31:36 jsing Exp $
2 2
3# amd64-specific libcrypto build rules 3# amd64-specific libcrypto build rules
4 4
@@ -53,6 +53,7 @@ SSLASM+= sha sha1-x86_64
53CFLAGS+= -DSHA256_ASM 53CFLAGS+= -DSHA256_ASM
54SRCS+= sha256_amd64.c 54SRCS+= sha256_amd64.c
55SRCS+= sha256_amd64_generic.S 55SRCS+= sha256_amd64_generic.S
56SRCS+= sha256_amd64_shani.S
56CFLAGS+= -DSHA512_ASM 57CFLAGS+= -DSHA512_ASM
57SRCS+= sha512_amd64.c 58SRCS+= sha512_amd64.c
58SRCS+= sha512_amd64_generic.S 59SRCS+= sha512_amd64_generic.S
diff --git a/src/lib/libcrypto/sha/sha256_amd64.c b/src/lib/libcrypto/sha/sha256_amd64.c
index f7531b340f..6c5d3e897f 100644
--- a/src/lib/libcrypto/sha/sha256_amd64.c
+++ b/src/lib/libcrypto/sha/sha256_amd64.c
@@ -1,4 +1,4 @@
1/* $OpenBSD: sha256_amd64.c,v 1.1 2024/11/08 15:09:48 jsing Exp $ */ 1/* $OpenBSD: sha256_amd64.c,v 1.2 2024/11/16 15:31:36 jsing Exp $ */
2/* 2/*
3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> 3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
4 * 4 *
@@ -17,10 +17,18 @@
17 17
18#include <openssl/sha.h> 18#include <openssl/sha.h>
19 19
20#include "crypto_arch.h"
21
20void sha256_block_generic(SHA256_CTX *ctx, const void *in, size_t num); 22void sha256_block_generic(SHA256_CTX *ctx, const void *in, size_t num);
23void sha256_block_shani(SHA256_CTX *ctx, const void *in, size_t num);
21 24
22void 25void
23sha256_block_data_order(SHA256_CTX *ctx, const void *in, size_t num) 26sha256_block_data_order(SHA256_CTX *ctx, const void *in, size_t num)
24{ 27{
28 if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_SHA) != 0) {
29 sha256_block_shani(ctx, in, num);
30 return;
31 }
32
25 sha256_block_generic(ctx, in, num); 33 sha256_block_generic(ctx, in, num);
26} 34}
diff --git a/src/lib/libcrypto/sha/sha256_amd64_shani.S b/src/lib/libcrypto/sha/sha256_amd64_shani.S
new file mode 100644
index 0000000000..df3a796b45
--- /dev/null
+++ b/src/lib/libcrypto/sha/sha256_amd64_shani.S
@@ -0,0 +1,209 @@
1/* $OpenBSD: sha256_amd64_shani.S,v 1.1 2024/11/16 15:31:36 jsing Exp $ */
2/*
3 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#ifdef __CET__
19#include <cet.h>
20#else
21#define _CET_ENDBR
22#endif
23
24/*
25 * SHA-256 implementation using the Intel SHA extensions:
26 *
27 * https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html
28 */
29
30#define ctx %rdi
31#define in %rsi
32#define num %rdx
33
34#define end %rbx
35
36#define k256 %rbp
37
38#define xmsg %xmm0
39
40#define xhs0 %xmm1
41#define xhs1 %xmm2
42
43#define xabef %xmm3
44#define xcdgh %xmm4
45
46#define xmsgtmp0 %xmm6
47#define xmsgtmp1 %xmm7
48#define xmsgtmp2 %xmm8
49#define xmsgtmp3 %xmm9
50#define xmsgtmp4 %xmm10
51
52#define xshufmask %xmm11
53
54#define xtmp0 %xmm12
55
56#define sha256_message_schedule_load(idx, m, xmsgtmp) \
57 movdqu (idx*16)(m), xmsg; \
58 pshufb xshufmask, xmsg; \
59 movdqa xmsg, xmsgtmp;
60
61#define sha256_message_schedule_update(xmt0, xmt1, xmt2, xmt3) \
62 sha256msg1 xmt1, xmt0; \
63 movdqa xmt3, xmsgtmp4; \
64 palignr $4, xmt2, xmsgtmp4; \
65 paddd xmsgtmp4, xmt0; \
66 sha256msg2 xmt3, xmt0;
67
68#define sha256_shani_round(idx) \
69 paddd (idx*16)(k256), xmsg; \
70 sha256rnds2 xmsg, xhs0, xhs1; \
71 pshufd $0x0e, xmsg, xmsg; \
72 sha256rnds2 xmsg, xhs1, xhs0;
73
74#define sha256_shani_round_load(idx, m, xmsgtmp) \
75 sha256_message_schedule_load(idx, m, xmsgtmp); \
76 sha256_shani_round(idx);
77
78#define sha256_shani_round_update(idx, xmt0, xmt1, xmt2, xmt3) \
79 sha256_message_schedule_update(xmt0, xmt1, xmt2, xmt3); \
80 movdqa xmt0, xmsg; \
81 sha256_shani_round(idx);
82
83.text
84
85/*
86 * void sha256_block_shani(SHA256_CTX *ctx, const void *in, size_t num);
87 *
88 * Standard x86-64 ABI: rdi = ctx, rsi = in, rdx = num
89 */
90.align 16
91.globl sha256_block_shani
92.type sha256_block_shani,@function
93sha256_block_shani:
94 _CET_ENDBR
95
96 /* Save callee save registers. */
97 pushq %rbx
98 pushq %rbp
99
100 /* Compute end of message. */
101 shlq $6, num
102 leaq (in, num, 1), end
103
104 /* Address of SHA-256 constants. */
105 leaq K256(%rip), k256
106
107 /* Load endian shuffle mask. */
108 movdqa shufmask(%rip), xshufmask
109
110 /* Load current hash state from context. */
111 movdqu (0*16)(ctx), xhs0 /* dcba */
112 movdqu (1*16)(ctx), xhs1 /* hgfe */
113
114 /* Rearrange words to construct abef/cdgh. */
115 pshufd $0xb1, xhs0, xhs0 /* cdab */
116 pshufd $0x1b, xhs1, xhs1 /* efgh */
117 movdqa xhs0, xtmp0
118 palignr $8, xhs1, xhs0 /* abef */
119 pblendw $0xf0, xtmp0, xhs1 /* cdgh */
120
121 jmp .Lshani_block_loop
122
123.align 16
124.Lshani_block_loop:
125 /* Save state for accumulation. */
126 movdqa xhs0, xabef
127 movdqa xhs1, xcdgh
128
129 /* Rounds 0 through 15 (four rounds at a time). */
130 sha256_shani_round_load(0, in, xmsgtmp0)
131 sha256_shani_round_load(1, in, xmsgtmp1)
132 sha256_shani_round_load(2, in, xmsgtmp2)
133 sha256_shani_round_load(3, in, xmsgtmp3)
134
135 /* Rounds 16 through 63 (four rounds at a time). */
136 sha256_shani_round_update(4, xmsgtmp0, xmsgtmp1, xmsgtmp2, xmsgtmp3)
137 sha256_shani_round_update(5, xmsgtmp1, xmsgtmp2, xmsgtmp3, xmsgtmp0)
138 sha256_shani_round_update(6, xmsgtmp2, xmsgtmp3, xmsgtmp0, xmsgtmp1)
139 sha256_shani_round_update(7, xmsgtmp3, xmsgtmp0, xmsgtmp1, xmsgtmp2)
140
141 sha256_shani_round_update(8, xmsgtmp0, xmsgtmp1, xmsgtmp2, xmsgtmp3)
142 sha256_shani_round_update(9, xmsgtmp1, xmsgtmp2, xmsgtmp3, xmsgtmp0)
143 sha256_shani_round_update(10, xmsgtmp2, xmsgtmp3, xmsgtmp0, xmsgtmp1)
144 sha256_shani_round_update(11, xmsgtmp3, xmsgtmp0, xmsgtmp1, xmsgtmp2)
145
146 sha256_shani_round_update(12, xmsgtmp0, xmsgtmp1, xmsgtmp2, xmsgtmp3)
147 sha256_shani_round_update(13, xmsgtmp1, xmsgtmp2, xmsgtmp3, xmsgtmp0)
148 sha256_shani_round_update(14, xmsgtmp2, xmsgtmp3, xmsgtmp0, xmsgtmp1)
149 sha256_shani_round_update(15, xmsgtmp3, xmsgtmp0, xmsgtmp1, xmsgtmp2)
150
151 /* Accumulate hash state. */
152 paddd xabef, xhs0
153 paddd xcdgh, xhs1
154
155 addq $64, in
156 cmpq end, in
157 jb .Lshani_block_loop
158
159 /* Rearrange words to construct dcba/hgfe. */
160 pshufd $0x1b, xhs0, xhs0 /* feba */
161 pshufd $0xb1, xhs1, xhs1 /* dchg */
162 movdqa xhs0, xtmp0
163 pblendw $0xf0, xhs1, xhs0 /* dcba */
164 palignr $8, xtmp0, xhs1 /* hgfe */
165
166 /* Update stored hash context. */
167 movdqu xhs0, (0*16)(ctx)
168 movdqu xhs1, (1*16)(ctx)
169
170 /* Restore callee save registers. */
171 popq %rbp
172 popq %rbx
173
174 ret
175
176.rodata
177
178/*
179 * Shuffle mask - little endian to big endian word conversion.
180 */
181.align 16
182.type shufmask,@object
183shufmask:
184.octa 0x0c0d0e0f08090a0b0405060700010203
185.size shufmask,.-shufmask
186
187/*
188 * SHA-256 constants - see FIPS 180-4 section 4.2.2.
189 */
190.align 64
191.type K256,@object
192K256:
193.long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
194.long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
195.long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
196.long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
197.long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
198.long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
199.long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
200.long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
201.long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
202.long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
203.long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
204.long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
205.long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
206.long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
207.long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
208.long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
209.size K256,.-K256