diff options
-rw-r--r-- | src/lib/libcrypto/arch/amd64/Makefile.inc | 9 | ||||
-rw-r--r-- | src/lib/libcrypto/sha/sha256_amd64.c | 26 | ||||
-rw-r--r-- | src/lib/libcrypto/sha/sha256_amd64_generic.S | 301 |
3 files changed, 330 insertions, 6 deletions
diff --git a/src/lib/libcrypto/arch/amd64/Makefile.inc b/src/lib/libcrypto/arch/amd64/Makefile.inc index 2f41f44381..07fcf46ed5 100644 --- a/src/lib/libcrypto/arch/amd64/Makefile.inc +++ b/src/lib/libcrypto/arch/amd64/Makefile.inc | |||
@@ -1,4 +1,4 @@ | |||
1 | # $OpenBSD: Makefile.inc,v 1.30 2024/10/18 13:36:24 jsing Exp $ | 1 | # $OpenBSD: Makefile.inc,v 1.31 2024/11/08 15:09:48 jsing Exp $ |
2 | 2 | ||
3 | # amd64-specific libcrypto build rules | 3 | # amd64-specific libcrypto build rules |
4 | 4 | ||
@@ -51,11 +51,8 @@ SSLASM+= rc4 rc4-x86_64 | |||
51 | CFLAGS+= -DSHA1_ASM | 51 | CFLAGS+= -DSHA1_ASM |
52 | SSLASM+= sha sha1-x86_64 | 52 | SSLASM+= sha sha1-x86_64 |
53 | CFLAGS+= -DSHA256_ASM | 53 | CFLAGS+= -DSHA256_ASM |
54 | SRCS+= sha256-x86_64.S | 54 | SRCS+= sha256_amd64.c |
55 | GENERATED+= sha256-x86_64.S | 55 | SRCS+= sha256_amd64_generic.S |
56 | sha256-x86_64.S: ${LCRYPTO_SRC}/sha/asm/sha512-x86_64.pl ${EXTRA_PL} | ||
57 | cd ${LCRYPTO_SRC}/sha/asm ; \ | ||
58 | /usr/bin/perl ./sha512-x86_64.pl ${.OBJDIR}/${.TARGET} | ||
59 | CFLAGS+= -DSHA512_ASM | 56 | CFLAGS+= -DSHA512_ASM |
60 | SRCS+= sha512-x86_64.S | 57 | SRCS+= sha512-x86_64.S |
61 | GENERATED+= sha512-x86_64.S | 58 | GENERATED+= sha512-x86_64.S |
diff --git a/src/lib/libcrypto/sha/sha256_amd64.c b/src/lib/libcrypto/sha/sha256_amd64.c new file mode 100644 index 0000000000..f7531b340f --- /dev/null +++ b/src/lib/libcrypto/sha/sha256_amd64.c | |||
@@ -0,0 +1,26 @@ | |||
1 | /* $OpenBSD: sha256_amd64.c,v 1.1 2024/11/08 15:09:48 jsing Exp $ */ | ||
2 | /* | ||
3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> | ||
4 | * | ||
5 | * Permission to use, copy, modify, and distribute this software for any | ||
6 | * purpose with or without fee is hereby granted, provided that the above | ||
7 | * copyright notice and this permission notice appear in all copies. | ||
8 | * | ||
9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | */ | ||
17 | |||
18 | #include <openssl/sha.h> | ||
19 | |||
20 | void sha256_block_generic(SHA256_CTX *ctx, const void *in, size_t num); | ||
21 | |||
22 | void | ||
23 | sha256_block_data_order(SHA256_CTX *ctx, const void *in, size_t num) | ||
24 | { | ||
25 | sha256_block_generic(ctx, in, num); | ||
26 | } | ||
diff --git a/src/lib/libcrypto/sha/sha256_amd64_generic.S b/src/lib/libcrypto/sha/sha256_amd64_generic.S new file mode 100644 index 0000000000..07078fb0d5 --- /dev/null +++ b/src/lib/libcrypto/sha/sha256_amd64_generic.S | |||
@@ -0,0 +1,301 @@ | |||
1 | /* $OpenBSD: sha256_amd64_generic.S,v 1.1 2024/11/08 15:09:48 jsing Exp $ */ | ||
2 | /* | ||
3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> | ||
4 | * | ||
5 | * Permission to use, copy, modify, and distribute this software for any | ||
6 | * purpose with or without fee is hereby granted, provided that the above | ||
7 | * copyright notice and this permission notice appear in all copies. | ||
8 | * | ||
9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | */ | ||
17 | |||
18 | #ifdef __CET__ | ||
19 | #include <cet.h> | ||
20 | #else | ||
21 | #define _CET_ENDBR | ||
22 | #endif | ||
23 | |||
24 | #define ctx %rdi | ||
25 | #define in %rsi | ||
26 | #define num %rdx | ||
27 | |||
28 | #define round %rdi | ||
29 | |||
30 | #define hs0 %r8d | ||
31 | #define hs1 %r9d | ||
32 | #define hs2 %r10d | ||
33 | #define hs3 %r11d | ||
34 | #define hs4 %r12d | ||
35 | #define hs5 %r13d | ||
36 | #define hs6 %r14d | ||
37 | #define hs7 %r15d | ||
38 | |||
39 | #define k256 %rbp | ||
40 | |||
41 | #define tmp0 %eax | ||
42 | #define tmp1 %ebx | ||
43 | #define tmp2 %ecx | ||
44 | #define tmp3 %edx | ||
45 | |||
46 | /* | ||
47 | * Load message into wt, storing a copy in the message schedule: | ||
48 | * | ||
49 | * Wt = Mt | ||
50 | */ | ||
51 | #define sha256_message_schedule_load(idx, m, w, wt) \ | ||
52 | movl (m, round, 4), wt; \ | ||
53 | bswapl wt; \ | ||
54 | movl wt, ((idx&0xf)*4)(w); | ||
55 | |||
56 | /* | ||
57 | * Update message schedule and return current value in wt: | ||
58 | * | ||
59 | * Wt = sigma1(W(t-2)) + W(t-7) + sigma0(W(t-15)) + W(t-16) | ||
60 | * | ||
61 | * sigma0(x) = ror(x, 7) ^ ror(x, 18) ^ (x >> 3) | ||
62 | * sigma1(x) = ror(x, 17) ^ ror(x, 19) ^ (x >> 10) | ||
63 | */ | ||
64 | #define sha256_message_schedule_update(idx, w, wt) \ | ||
65 | movl (((idx-2)&0xf)*4)(w), wt; /* sigma1 */ \ | ||
66 | movl wt, tmp1; /* sigma1 */ \ | ||
67 | rorl $(19-17), tmp1; /* sigma1 */ \ | ||
68 | xorl wt, tmp1; /* sigma1 */ \ | ||
69 | rorl $17, tmp1; /* sigma1 */ \ | ||
70 | shrl $10, wt; /* sigma1 */ \ | ||
71 | xorl tmp1, wt; /* sigma1 */ \ | ||
72 | \ | ||
73 | addl (((idx-7)&0xf)*4)(w), wt; /* Wt-7 */ \ | ||
74 | addl (((idx-16)&0xf)*4)(w), wt; /* Wt-16 */ \ | ||
75 | \ | ||
76 | movl (((idx-15)&0xf)*4)(w), tmp2; /* sigma0 */ \ | ||
77 | movl tmp2, tmp3; /* sigma0 */ \ | ||
78 | rorl $(18-7), tmp2; /* sigma0 */ \ | ||
79 | xorl tmp3, tmp2; /* sigma0 */ \ | ||
80 | rorl $7, tmp2; /* sigma0 */ \ | ||
81 | shrl $3, tmp3; /* sigma0 */ \ | ||
82 | xorl tmp3, tmp2; /* sigma0 */ \ | ||
83 | addl tmp2, wt; /* sigma0 */ \ | ||
84 | \ | ||
85 | movl wt, ((idx&0xf)*4)(w); | ||
86 | |||
87 | /* | ||
88 | * Compute a SHA-256 round: | ||
89 | * | ||
90 | * T1 = h + Sigma1(e) + Ch(e, f, g) + Kt + Wt | ||
91 | * T2 = Sigma0(a) + Maj(a, b, c) | ||
92 | * | ||
93 | * Sigma0(x) = ror(x, 2) ^ ror(x, 13) ^ ror(x, 22) | ||
94 | * Sigma1(x) = ror(x, 6) ^ ror(x, 11) ^ ror(x, 25) | ||
95 | * Ch(x, y, z) = (x & y) ^ (~x & z) = ((y ^ z) & x) ^ z | ||
96 | * Maj(x, y, z) = (x & y) ^ (x & z) ^ (y & z) = ((y ^ z) & x) ^ (y & z) | ||
97 | * | ||
98 | * Upon completion d = d + T1, h = T1 + T2, pending rotation. | ||
99 | */ | ||
100 | #define sha256_round(idx, a, b, c, d, e, f, g, h, k, w, wt) \ | ||
101 | addl wt, h; /* T1 Wt */ \ | ||
102 | addl (k256, round, 4), h; /* T1 Kt */ \ | ||
103 | \ | ||
104 | movl e, tmp1; /* T1 Sigma1 */ \ | ||
105 | rorl $(25-11), tmp1; /* T1 Sigma1 */ \ | ||
106 | xorl e, tmp1; /* T1 Sigma1 */ \ | ||
107 | rorl $(11-6), tmp1; /* T1 Sigma1 */ \ | ||
108 | xorl e, tmp1; /* T1 Sigma1 */ \ | ||
109 | rorl $6, tmp1; /* T1 Sigma1 */ \ | ||
110 | addl tmp1, h; /* T1 Sigma1 */ \ | ||
111 | \ | ||
112 | movl f, tmp2; /* T1 Ch */ \ | ||
113 | xorl g, tmp2; /* T1 Ch */ \ | ||
114 | andl e, tmp2; /* T1 Ch */ \ | ||
115 | xorl g, tmp2; /* T1 Ch */ \ | ||
116 | addl tmp2, h; /* T1 Ch */ \ | ||
117 | \ | ||
118 | addl h, d; /* d += T1 */ \ | ||
119 | \ | ||
120 | movl a, tmp1; /* T2 Sigma0 */ \ | ||
121 | rorl $(22-13), tmp1; /* T2 Sigma0 */ \ | ||
122 | xorl a, tmp1; /* T2 Sigma0 */ \ | ||
123 | rorl $(13-2), tmp1; /* T2 Sigma0 */ \ | ||
124 | xorl a, tmp1; /* T2 Sigma0 */ \ | ||
125 | rorl $2, tmp1; /* T2 Sigma0 */ \ | ||
126 | addl tmp1, h; /* T2 Sigma0 */ \ | ||
127 | \ | ||
128 | movl b, tmp2; /* T2 Maj */ \ | ||
129 | xorl c, tmp2; /* T2 Maj */ \ | ||
130 | andl a, tmp2; /* T2 Maj */ \ | ||
131 | movl b, tmp3; /* T2 Maj */ \ | ||
132 | andl c, tmp3; /* T2 Maj */ \ | ||
133 | xorl tmp2, tmp3; /* T2 Maj */ \ | ||
134 | addl tmp3, h; /* T2 Maj */ \ | ||
135 | \ | ||
136 | addq $1, round; | ||
137 | |||
138 | #define sha256_round_load(idx, a, b, c, d, e, f, g, h) \ | ||
139 | sha256_message_schedule_load(idx, in, %rsp, tmp0) \ | ||
140 | sha256_round(idx, a, b, c, d, e, f, g, h, k256, %rsp, tmp0) | ||
141 | |||
142 | #define sha256_round_update(idx, a, b, c, d, e, f, g, h) \ | ||
143 | sha256_message_schedule_update(idx, %rsp, tmp0) \ | ||
144 | sha256_round(idx, a, b, c, d, e, f, g, h, k256, %rsp, tmp0) | ||
145 | |||
146 | .text | ||
147 | |||
148 | /* | ||
149 | * void sha256_block_generic(SHA256_CTX *ctx, const void *in, size_t num); | ||
150 | * | ||
151 | * Standard x86-64 ABI: rdi = ctx, rsi = in, rdx = num | ||
152 | */ | ||
153 | .align 16 | ||
154 | .globl sha256_block_generic | ||
155 | .type sha256_block_generic,@function | ||
156 | sha256_block_generic: | ||
157 | _CET_ENDBR | ||
158 | |||
159 | /* Save callee save registers. */ | ||
160 | pushq %rbx | ||
161 | pushq %rbp | ||
162 | pushq %r12 | ||
163 | pushq %r13 | ||
164 | pushq %r14 | ||
165 | pushq %r15 | ||
166 | |||
167 | /* Allocate space for message schedule and context pointer. */ | ||
168 | movq %rsp, %rax | ||
169 | subq $(64+32), %rsp | ||
170 | andq $~63, %rsp | ||
171 | movq %rax, (64+16)(%rsp) | ||
172 | movq ctx, (64+8)(%rsp) | ||
173 | |||
174 | /* Compute and store end of message. */ | ||
175 | shlq $6, num | ||
176 | leaq (in, num, 1), %rbx | ||
177 | movq %rbx, (64+0)(%rsp) | ||
178 | |||
179 | /* Address of SHA-256 constants. */ | ||
180 | leaq K256(%rip), k256 | ||
181 | |||
182 | /* Load current hash state from context. */ | ||
183 | movl (0*4)(ctx), hs0 | ||
184 | movl (1*4)(ctx), hs1 | ||
185 | movl (2*4)(ctx), hs2 | ||
186 | movl (3*4)(ctx), hs3 | ||
187 | movl (4*4)(ctx), hs4 | ||
188 | movl (5*4)(ctx), hs5 | ||
189 | movl (6*4)(ctx), hs6 | ||
190 | movl (7*4)(ctx), hs7 | ||
191 | |||
192 | jmp .Lblock_loop0 | ||
193 | |||
194 | .align 16 | ||
195 | .Lblock_loop0: | ||
196 | mov $0, round | ||
197 | |||
198 | /* Round 0 through 15. */ | ||
199 | sha256_round_load(0, hs0, hs1, hs2, hs3, hs4, hs5, hs6, hs7) | ||
200 | sha256_round_load(1, hs7, hs0, hs1, hs2, hs3, hs4, hs5, hs6) | ||
201 | sha256_round_load(2, hs6, hs7, hs0, hs1, hs2, hs3, hs4, hs5) | ||
202 | sha256_round_load(3, hs5, hs6, hs7, hs0, hs1, hs2, hs3, hs4) | ||
203 | sha256_round_load(4, hs4, hs5, hs6, hs7, hs0, hs1, hs2, hs3) | ||
204 | sha256_round_load(5, hs3, hs4, hs5, hs6, hs7, hs0, hs1, hs2) | ||
205 | sha256_round_load(6, hs2, hs3, hs4, hs5, hs6, hs7, hs0, hs1) | ||
206 | sha256_round_load(7, hs1, hs2, hs3, hs4, hs5, hs6, hs7, hs0) | ||
207 | sha256_round_load(8, hs0, hs1, hs2, hs3, hs4, hs5, hs6, hs7) | ||
208 | sha256_round_load(9, hs7, hs0, hs1, hs2, hs3, hs4, hs5, hs6) | ||
209 | sha256_round_load(10, hs6, hs7, hs0, hs1, hs2, hs3, hs4, hs5) | ||
210 | sha256_round_load(11, hs5, hs6, hs7, hs0, hs1, hs2, hs3, hs4) | ||
211 | sha256_round_load(12, hs4, hs5, hs6, hs7, hs0, hs1, hs2, hs3) | ||
212 | sha256_round_load(13, hs3, hs4, hs5, hs6, hs7, hs0, hs1, hs2) | ||
213 | sha256_round_load(14, hs2, hs3, hs4, hs5, hs6, hs7, hs0, hs1) | ||
214 | sha256_round_load(15, hs1, hs2, hs3, hs4, hs5, hs6, hs7, hs0) | ||
215 | |||
216 | jmp .Lblock_loop16 | ||
217 | |||
218 | .align 16 | ||
219 | .Lblock_loop16: | ||
220 | /* Round 16 through 63. */ | ||
221 | sha256_round_update(16, hs0, hs1, hs2, hs3, hs4, hs5, hs6, hs7) | ||
222 | sha256_round_update(17, hs7, hs0, hs1, hs2, hs3, hs4, hs5, hs6) | ||
223 | sha256_round_update(18, hs6, hs7, hs0, hs1, hs2, hs3, hs4, hs5) | ||
224 | sha256_round_update(19, hs5, hs6, hs7, hs0, hs1, hs2, hs3, hs4) | ||
225 | sha256_round_update(20, hs4, hs5, hs6, hs7, hs0, hs1, hs2, hs3) | ||
226 | sha256_round_update(21, hs3, hs4, hs5, hs6, hs7, hs0, hs1, hs2) | ||
227 | sha256_round_update(22, hs2, hs3, hs4, hs5, hs6, hs7, hs0, hs1) | ||
228 | sha256_round_update(23, hs1, hs2, hs3, hs4, hs5, hs6, hs7, hs0) | ||
229 | sha256_round_update(24, hs0, hs1, hs2, hs3, hs4, hs5, hs6, hs7) | ||
230 | sha256_round_update(25, hs7, hs0, hs1, hs2, hs3, hs4, hs5, hs6) | ||
231 | sha256_round_update(26, hs6, hs7, hs0, hs1, hs2, hs3, hs4, hs5) | ||
232 | sha256_round_update(27, hs5, hs6, hs7, hs0, hs1, hs2, hs3, hs4) | ||
233 | sha256_round_update(28, hs4, hs5, hs6, hs7, hs0, hs1, hs2, hs3) | ||
234 | sha256_round_update(29, hs3, hs4, hs5, hs6, hs7, hs0, hs1, hs2) | ||
235 | sha256_round_update(30, hs2, hs3, hs4, hs5, hs6, hs7, hs0, hs1) | ||
236 | sha256_round_update(31, hs1, hs2, hs3, hs4, hs5, hs6, hs7, hs0) | ||
237 | |||
238 | cmp $64, round | ||
239 | jb .Lblock_loop16 | ||
240 | |||
241 | movq (64+8)(%rsp), ctx | ||
242 | |||
243 | /* Add intermediate state to hash state. */ | ||
244 | addl (0*4)(ctx), hs0 | ||
245 | addl (1*4)(ctx), hs1 | ||
246 | addl (2*4)(ctx), hs2 | ||
247 | addl (3*4)(ctx), hs3 | ||
248 | addl (4*4)(ctx), hs4 | ||
249 | addl (5*4)(ctx), hs5 | ||
250 | addl (6*4)(ctx), hs6 | ||
251 | addl (7*4)(ctx), hs7 | ||
252 | |||
253 | /* Store new hash state to context. */ | ||
254 | movl hs0, (0*4)(ctx) | ||
255 | movl hs1, (1*4)(ctx) | ||
256 | movl hs2, (2*4)(ctx) | ||
257 | movl hs3, (3*4)(ctx) | ||
258 | movl hs4, (4*4)(ctx) | ||
259 | movl hs5, (5*4)(ctx) | ||
260 | movl hs6, (6*4)(ctx) | ||
261 | movl hs7, (7*4)(ctx) | ||
262 | |||
263 | addq $64, in | ||
264 | cmpq (64+0)(%rsp), in | ||
265 | jb .Lblock_loop0 | ||
266 | |||
267 | movq (64+16)(%rsp), %rsp | ||
268 | |||
269 | /* Restore callee save registers. */ | ||
270 | popq %r15 | ||
271 | popq %r14 | ||
272 | popq %r13 | ||
273 | popq %r12 | ||
274 | popq %rbp | ||
275 | popq %rbx | ||
276 | |||
277 | ret | ||
278 | |||
279 | /* | ||
280 | * SHA-256 constants - see FIPS 180-4 section 4.2.2. | ||
281 | */ | ||
282 | .rodata | ||
283 | .align 64 | ||
284 | .type K256,@object | ||
285 | K256: | ||
286 | .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 | ||
287 | .long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 | ||
288 | .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 | ||
289 | .long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 | ||
290 | .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc | ||
291 | .long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da | ||
292 | .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 | ||
293 | .long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 | ||
294 | .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 | ||
295 | .long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 | ||
296 | .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 | ||
297 | .long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 | ||
298 | .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 | ||
299 | .long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 | ||
300 | .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 | ||
301 | .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 | ||