diff options
author | jsing <> | 2024-12-06 11:57:18 +0000 |
---|---|---|
committer | jsing <> | 2024-12-06 11:57:18 +0000 |
commit | d8f769ca48f14cb8455dfa8f2334c3c683502fe4 (patch) | |
tree | 4ddefe0cdee0b51074793a8db7ee53de07047a32 /src | |
parent | c8f5ae0825fe646838447e04c2976ed4321430b6 (diff) | |
download | openbsd-d8f769ca48f14cb8455dfa8f2334c3c683502fe4.tar.gz openbsd-d8f769ca48f14cb8455dfa8f2334c3c683502fe4.tar.bz2 openbsd-d8f769ca48f14cb8455dfa8f2334c3c683502fe4.zip |
Provide a SHA-1 assembly implementation for amd64 using SHA-NI.
This provides a SHA-1 assembly implementation for amd64, which uses
the Intel SHA Extensions (aka SHA New Instructions or SHA-NI). This
provides a 2-2.5x performance gain on some Intel CPUs and many AMD CPUs.
ok tb@
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/libcrypto/arch/amd64/Makefile.inc | 3 | ||||
-rw-r--r-- | src/lib/libcrypto/sha/sha1_amd64.c | 8 | ||||
-rw-r--r-- | src/lib/libcrypto/sha/sha1_amd64_shani.S | 170 |
3 files changed, 179 insertions, 2 deletions
diff --git a/src/lib/libcrypto/arch/amd64/Makefile.inc b/src/lib/libcrypto/arch/amd64/Makefile.inc index 33c7dbba26..f8f829cca1 100644 --- a/src/lib/libcrypto/arch/amd64/Makefile.inc +++ b/src/lib/libcrypto/arch/amd64/Makefile.inc | |||
@@ -1,4 +1,4 @@ | |||
1 | # $OpenBSD: Makefile.inc,v 1.34 2024/12/04 13:13:33 jsing Exp $ | 1 | # $OpenBSD: Makefile.inc,v 1.35 2024/12/06 11:57:17 jsing Exp $ |
2 | 2 | ||
3 | # amd64-specific libcrypto build rules | 3 | # amd64-specific libcrypto build rules |
4 | 4 | ||
@@ -51,6 +51,7 @@ SSLASM+= rc4 rc4-x86_64 | |||
51 | CFLAGS+= -DSHA1_ASM | 51 | CFLAGS+= -DSHA1_ASM |
52 | SRCS+= sha1_amd64.c | 52 | SRCS+= sha1_amd64.c |
53 | SRCS+= sha1_amd64_generic.S | 53 | SRCS+= sha1_amd64_generic.S |
54 | SRCS+= sha1_amd64_shani.S | ||
54 | CFLAGS+= -DSHA256_ASM | 55 | CFLAGS+= -DSHA256_ASM |
55 | SRCS+= sha256_amd64.c | 56 | SRCS+= sha256_amd64.c |
56 | SRCS+= sha256_amd64_generic.S | 57 | SRCS+= sha256_amd64_generic.S |
diff --git a/src/lib/libcrypto/sha/sha1_amd64.c b/src/lib/libcrypto/sha/sha1_amd64.c index b3d4ab1263..2976cc7e6e 100644 --- a/src/lib/libcrypto/sha/sha1_amd64.c +++ b/src/lib/libcrypto/sha/sha1_amd64.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: sha1_amd64.c,v 1.1 2024/12/04 13:13:33 jsing Exp $ */ | 1 | /* $OpenBSD: sha1_amd64.c,v 1.2 2024/12/06 11:57:18 jsing Exp $ */ |
2 | /* | 2 | /* |
3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> | 3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> |
4 | * | 4 | * |
@@ -20,9 +20,15 @@ | |||
20 | #include "crypto_arch.h" | 20 | #include "crypto_arch.h" |
21 | 21 | ||
22 | void sha1_block_generic(SHA_CTX *ctx, const void *in, size_t num); | 22 | void sha1_block_generic(SHA_CTX *ctx, const void *in, size_t num); |
23 | void sha1_block_shani(SHA_CTX *ctx, const void *in, size_t num); | ||
23 | 24 | ||
24 | void | 25 | void |
25 | sha1_block_data_order(SHA_CTX *ctx, const void *in, size_t num) | 26 | sha1_block_data_order(SHA_CTX *ctx, const void *in, size_t num) |
26 | { | 27 | { |
28 | if ((crypto_cpu_caps_amd64 & CRYPTO_CPU_CAPS_AMD64_SHA) != 0) { | ||
29 | sha1_block_shani(ctx, in, num); | ||
30 | return; | ||
31 | } | ||
32 | |||
27 | sha1_block_generic(ctx, in, num); | 33 | sha1_block_generic(ctx, in, num); |
28 | } | 34 | } |
diff --git a/src/lib/libcrypto/sha/sha1_amd64_shani.S b/src/lib/libcrypto/sha/sha1_amd64_shani.S new file mode 100644 index 0000000000..d7699d10f1 --- /dev/null +++ b/src/lib/libcrypto/sha/sha1_amd64_shani.S | |||
@@ -0,0 +1,170 @@ | |||
1 | /* $OpenBSD: sha1_amd64_shani.S,v 1.1 2024/12/06 11:57:18 jsing Exp $ */ | ||
2 | /* | ||
3 | * Copyright (c) 2024 Joel Sing <jsing@openbsd.org> | ||
4 | * | ||
5 | * Permission to use, copy, modify, and distribute this software for any | ||
6 | * purpose with or without fee is hereby granted, provided that the above | ||
7 | * copyright notice and this permission notice appear in all copies. | ||
8 | * | ||
9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | */ | ||
17 | |||
18 | #ifdef __CET__ | ||
19 | #include <cet.h> | ||
20 | #else | ||
21 | #define _CET_ENDBR | ||
22 | #endif | ||
23 | |||
24 | /* | ||
25 | * SHA-1 implementation using the Intel SHA extensions: | ||
26 | * | ||
27 | * https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html | ||
28 | */ | ||
29 | |||
30 | #define ctx %rdi | ||
31 | #define in %rsi | ||
32 | #define num %rdx | ||
33 | |||
34 | #define end %rbx | ||
35 | |||
36 | #define xabcd_save %xmm0 | ||
37 | #define xe_save %xmm1 | ||
38 | |||
39 | #define xabcd %xmm2 | ||
40 | #define xe0 %xmm3 | ||
41 | #define xe1 %xmm4 | ||
42 | |||
43 | #define xmsg0 %xmm5 | ||
44 | #define xmsg1 %xmm6 | ||
45 | #define xmsg2 %xmm7 | ||
46 | #define xmsg3 %xmm8 | ||
47 | |||
48 | #define xshufmask %xmm9 | ||
49 | |||
50 | |||
51 | #define sha1_message_schedule_load(idx, m, xmsg) \ | ||
52 | movdqu (idx*16)(m), xmsg; \ | ||
53 | pshufb xshufmask, xmsg; | ||
54 | |||
55 | #define sha1_message_schedule_update(xm0, xm1, xm2, xm3) \ | ||
56 | sha1msg1 xm1, xm0; \ | ||
57 | pxor xm2, xm0; \ | ||
58 | sha1msg2 xm3, xm0; | ||
59 | |||
60 | #define sha1_shani_round(fn, xmsg, xe, xe_next) \ | ||
61 | sha1nexte xmsg, xe; \ | ||
62 | movdqa xabcd, xe_next; \ | ||
63 | sha1rnds4 fn, xe, xabcd; | ||
64 | |||
65 | #define sha1_shani_round_load(fn, idx, m, xmsg, xe, xe_next) \ | ||
66 | sha1_message_schedule_load(idx, m, xmsg); \ | ||
67 | sha1_shani_round(fn, xmsg, xe, xe_next); | ||
68 | |||
69 | #define sha1_shani_round_update(fn, xm0, xm1, xm2, xm3, xe, xe_next) \ | ||
70 | sha1_message_schedule_update(xm0, xm1, xm2, xm3); \ | ||
71 | sha1_shani_round(fn, xm0, xe, xe_next); | ||
72 | |||
73 | |||
74 | .text | ||
75 | |||
76 | /* | ||
77 | * void sha1_block_shani(SHA256_CTX *ctx, const void *in, size_t num); | ||
78 | * | ||
79 | * Standard x86-64 ABI: rdi = ctx, rsi = in, rdx = num | ||
80 | */ | ||
81 | .align 16 | ||
82 | .globl sha1_block_shani | ||
83 | .type sha1_block_shani,@function | ||
84 | sha1_block_shani: | ||
85 | _CET_ENDBR | ||
86 | |||
87 | /* Save callee save registers. */ | ||
88 | pushq %rbx | ||
89 | |||
90 | /* Compute end of message. */ | ||
91 | shlq $6, num | ||
92 | leaq (in, num, 1), end | ||
93 | |||
94 | /* Load endian shuffle mask. */ | ||
95 | movdqa shufmask(%rip), xshufmask | ||
96 | |||
97 | /* Load current hash state from context. */ | ||
98 | movdqu (0*16)(ctx), xabcd | ||
99 | pshufd $0x1b, xabcd, xabcd /* dcba -> abcd */ | ||
100 | pxor xe0, xe0 | ||
101 | pinsrd $3, (1*16)(ctx), xe0 /* e */ | ||
102 | |||
103 | jmp .Lshani_block_loop | ||
104 | |||
105 | .align 16 | ||
106 | .Lshani_block_loop: | ||
107 | /* Save state for accumulation. */ | ||
108 | movdqa xabcd, xabcd_save | ||
109 | movdqa xe0, xe_save | ||
110 | |||
111 | /* Rounds 0 through 15 (four rounds at a time). */ | ||
112 | sha1_message_schedule_load(0, in, xmsg0); | ||
113 | paddd xmsg0, xe0 | ||
114 | movdqa xabcd, xe1 | ||
115 | sha1rnds4 $0, xe0, xabcd | ||
116 | |||
117 | sha1_shani_round_load($0, 1, in, xmsg1, xe1, xe0); | ||
118 | sha1_shani_round_load($0, 2, in, xmsg2, xe0, xe1); | ||
119 | sha1_shani_round_load($0, 3, in, xmsg3, xe1, xe0); | ||
120 | |||
121 | /* Rounds 16 through 79 (four rounds at a time). */ | ||
122 | sha1_shani_round_update($0, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1) | ||
123 | sha1_shani_round_update($1, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0) | ||
124 | sha1_shani_round_update($1, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1) | ||
125 | sha1_shani_round_update($1, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0) | ||
126 | |||
127 | sha1_shani_round_update($1, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1) | ||
128 | sha1_shani_round_update($1, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0) | ||
129 | sha1_shani_round_update($2, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1) | ||
130 | sha1_shani_round_update($2, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0) | ||
131 | |||
132 | sha1_shani_round_update($2, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1) | ||
133 | sha1_shani_round_update($2, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0) | ||
134 | sha1_shani_round_update($2, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1) | ||
135 | sha1_shani_round_update($3, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0) | ||
136 | |||
137 | sha1_shani_round_update($3, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1) | ||
138 | sha1_shani_round_update($3, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0) | ||
139 | sha1_shani_round_update($3, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1) | ||
140 | sha1_shani_round_update($3, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0) | ||
141 | |||
142 | /* Accumulate hash state. */ | ||
143 | paddd xabcd_save, xabcd | ||
144 | sha1nexte xe_save, xe0 | ||
145 | |||
146 | addq $64, in | ||
147 | cmpq end, in | ||
148 | jb .Lshani_block_loop | ||
149 | |||
150 | /* Update stored hash context. */ | ||
151 | pshufd $0x1b, xabcd, xabcd /* abcd -> dcba */ | ||
152 | movdqu xabcd, (0*16)(ctx) | ||
153 | pextrd $3, xe0, (1*16)(ctx) /* e */ | ||
154 | |||
155 | /* Restore callee save registers. */ | ||
156 | popq %rbx | ||
157 | |||
158 | ret | ||
159 | |||
160 | .rodata | ||
161 | |||
162 | /* | ||
163 | * Shuffle mask - byte reversal for little endian to big endian word conversion, | ||
164 | * and reordering to abcd. | ||
165 | */ | ||
166 | .align 16 | ||
167 | .type shufmask,@object | ||
168 | shufmask: | ||
169 | .octa 0x000102030405060708090a0b0c0d0e0f | ||
170 | .size shufmask,.-shufmask | ||