diff options
author | jsing <> | 2025-01-24 13:35:04 +0000 |
---|---|---|
committer | jsing <> | 2025-01-24 13:35:04 +0000 |
commit | eb37484907f8d59aa15c1bd84262872087f909c8 (patch) | |
tree | 4a38be3aa387374f26fe17d96d1f3348009bd64c /src | |
parent | 0fc89df461764e969e114f07942a72c98b6d4e8e (diff) | |
download | openbsd-eb37484907f8d59aa15c1bd84262872087f909c8.tar.gz openbsd-eb37484907f8d59aa15c1bd84262872087f909c8.tar.bz2 openbsd-eb37484907f8d59aa15c1bd84262872087f909c8.zip |
Provide a readable assembly implementation for MD5 on amd64.
This appears to be about 5% faster than the current perlasm version on a
modern Intel CPU.
While here rename md5_block_asm_data_order to md5_block_data_order, for
consistency with other hashes.
ok tb@
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/libcrypto/arch/amd64/Makefile.inc | 4 | ||||
-rw-r--r-- | src/lib/libcrypto/md5/asm/md5-586.pl | 2 | ||||
-rwxr-xr-x | src/lib/libcrypto/md5/asm/md5-x86_64.pl | 8 | ||||
-rw-r--r-- | src/lib/libcrypto/md5/md5.c | 5 | ||||
-rw-r--r-- | src/lib/libcrypto/md5/md5_amd64_generic.S | 237 |
5 files changed, 246 insertions, 10 deletions
diff --git a/src/lib/libcrypto/arch/amd64/Makefile.inc b/src/lib/libcrypto/arch/amd64/Makefile.inc index f8f829cca1..f4410e8059 100644 --- a/src/lib/libcrypto/arch/amd64/Makefile.inc +++ b/src/lib/libcrypto/arch/amd64/Makefile.inc | |||
@@ -1,4 +1,4 @@ | |||
1 | # $OpenBSD: Makefile.inc,v 1.35 2024/12/06 11:57:17 jsing Exp $ | 1 | # $OpenBSD: Makefile.inc,v 1.36 2025/01/24 13:35:04 jsing Exp $ |
2 | 2 | ||
3 | # amd64-specific libcrypto build rules | 3 | # amd64-specific libcrypto build rules |
4 | 4 | ||
@@ -40,7 +40,7 @@ SRCS += word_clz.S | |||
40 | 40 | ||
41 | # md5 | 41 | # md5 |
42 | CFLAGS+= -DMD5_ASM | 42 | CFLAGS+= -DMD5_ASM |
43 | SSLASM+= md5 md5-x86_64 | 43 | SRCS+= md5_amd64_generic.S |
44 | # modes | 44 | # modes |
45 | CFLAGS+= -DGHASH_ASM | 45 | CFLAGS+= -DGHASH_ASM |
46 | SSLASM+= modes ghash-x86_64 | 46 | SSLASM+= modes ghash-x86_64 |
diff --git a/src/lib/libcrypto/md5/asm/md5-586.pl b/src/lib/libcrypto/md5/asm/md5-586.pl index 6cb66bb499..a039efd899 100644 --- a/src/lib/libcrypto/md5/asm/md5-586.pl +++ b/src/lib/libcrypto/md5/asm/md5-586.pl | |||
@@ -30,7 +30,7 @@ $X="esi"; | |||
30 | 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3 | 30 | 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3 |
31 | ); | 31 | ); |
32 | 32 | ||
33 | &md5_block("md5_block_asm_data_order"); | 33 | &md5_block("md5_block_data_order"); |
34 | &asm_finish(); | 34 | &asm_finish(); |
35 | 35 | ||
36 | sub Np | 36 | sub Np |
diff --git a/src/lib/libcrypto/md5/asm/md5-x86_64.pl b/src/lib/libcrypto/md5/asm/md5-x86_64.pl index 5001c34724..a2d97b28e3 100755 --- a/src/lib/libcrypto/md5/asm/md5-x86_64.pl +++ b/src/lib/libcrypto/md5/asm/md5-x86_64.pl | |||
@@ -125,9 +125,9 @@ $code .= <<EOF; | |||
125 | .text | 125 | .text |
126 | .align 16 | 126 | .align 16 |
127 | 127 | ||
128 | .globl md5_block_asm_data_order | 128 | .globl md5_block_data_order |
129 | .type md5_block_asm_data_order,\@function,3 | 129 | .type md5_block_data_order,\@function,3 |
130 | md5_block_asm_data_order: | 130 | md5_block_data_order: |
131 | _CET_ENDBR | 131 | _CET_ENDBR |
132 | push %rbp | 132 | push %rbp |
133 | push %rbx | 133 | push %rbx |
@@ -257,7 +257,7 @@ $code .= <<EOF; | |||
257 | add \$40,%rsp | 257 | add \$40,%rsp |
258 | .Lepilogue: | 258 | .Lepilogue: |
259 | ret | 259 | ret |
260 | .size md5_block_asm_data_order,.-md5_block_asm_data_order | 260 | .size md5_block_data_order,.-md5_block_data_order |
261 | EOF | 261 | EOF |
262 | 262 | ||
263 | print $code; | 263 | print $code; |
diff --git a/src/lib/libcrypto/md5/md5.c b/src/lib/libcrypto/md5/md5.c index 3bc558f0f2..f1c9223d86 100644 --- a/src/lib/libcrypto/md5/md5.c +++ b/src/lib/libcrypto/md5/md5.c | |||
@@ -1,4 +1,4 @@ | |||
1 | /* $OpenBSD: md5.c,v 1.24 2025/01/19 07:51:41 jsing Exp $ */ | 1 | /* $OpenBSD: md5.c,v 1.25 2025/01/24 13:35:04 jsing Exp $ */ |
2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) | 2 | /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) |
3 | * All rights reserved. | 3 | * All rights reserved. |
4 | * | 4 | * |
@@ -71,8 +71,7 @@ | |||
71 | CTASSERT(sizeof(MD5_LONG) == sizeof(uint32_t)); | 71 | CTASSERT(sizeof(MD5_LONG) == sizeof(uint32_t)); |
72 | 72 | ||
73 | #ifdef MD5_ASM | 73 | #ifdef MD5_ASM |
74 | void md5_block_asm_data_order(MD5_CTX *c, const void *p, size_t num); | 74 | void md5_block_data_order(MD5_CTX *c, const void *p, size_t num); |
75 | #define md5_block_data_order md5_block_asm_data_order | ||
76 | #endif | 75 | #endif |
77 | 76 | ||
78 | #ifndef MD5_ASM | 77 | #ifndef MD5_ASM |
diff --git a/src/lib/libcrypto/md5/md5_amd64_generic.S b/src/lib/libcrypto/md5/md5_amd64_generic.S new file mode 100644 index 0000000000..e282d56ad8 --- /dev/null +++ b/src/lib/libcrypto/md5/md5_amd64_generic.S | |||
@@ -0,0 +1,237 @@ | |||
1 | /* $OpenBSD: md5_amd64_generic.S,v 1.1 2025/01/24 13:35:04 jsing Exp $ */ | ||
2 | /* | ||
3 | * Copyright (c) 2025 Joel Sing <jsing@openbsd.org> | ||
4 | * | ||
5 | * Permission to use, copy, modify, and distribute this software for any | ||
6 | * purpose with or without fee is hereby granted, provided that the above | ||
7 | * copyright notice and this permission notice appear in all copies. | ||
8 | * | ||
9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||
10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||
11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||
12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||
13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||
14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||
15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||
16 | */ | ||
17 | |||
18 | #ifdef __CET__ | ||
19 | #include <cet.h> | ||
20 | #else | ||
21 | #define _CET_ENDBR | ||
22 | #endif | ||
23 | |||
24 | #define ctx %rdi | ||
25 | #define in %rsi | ||
26 | #define num %rdx | ||
27 | |||
28 | #define end %rbp | ||
29 | |||
30 | #define A %eax | ||
31 | #define B %ebx | ||
32 | #define C %ecx | ||
33 | #define D %edx | ||
34 | |||
35 | #define AA %r8d | ||
36 | #define BB %r9d | ||
37 | #define CC %r10d | ||
38 | #define DD %r11d | ||
39 | |||
40 | #define tmp0 %r12d | ||
41 | #define tmp1 %r13d | ||
42 | |||
43 | /* | ||
44 | * Compute MD5 round 1 as: | ||
45 | * | ||
46 | * a = b + rol(a + F(b, c, d) + x + t, s) | ||
47 | * F(x, y, z) = (x & y) | (~x & z) | ||
48 | * = ((y ^ z) & x) ^ z | ||
49 | */ | ||
50 | #define md5_round1(a, b, c, d, x, t, s) \ | ||
51 | addl (x*4)(in), a; \ | ||
52 | movl c, tmp0; \ | ||
53 | xorl d, tmp0; \ | ||
54 | andl b, tmp0; \ | ||
55 | xorl d, tmp0; \ | ||
56 | leal t(tmp0, a), a; \ | ||
57 | roll $s, a; \ | ||
58 | addl b, a; | ||
59 | |||
60 | /* | ||
61 | * Compute MD5 round 2 as: | ||
62 | * | ||
63 | * a = b + rol(a + G(b, c, d) + x + t, s) | ||
64 | * G(x, y, z) = (x & z) | (y & ~z) | ||
65 | */ | ||
66 | #define md5_round2(a, b, c, d, x, t, s) \ | ||
67 | addl (x*4)(in), a; \ | ||
68 | movl d, tmp0; \ | ||
69 | xorl $-1, tmp0; \ | ||
70 | andl c, tmp0; \ | ||
71 | addl tmp0, a; \ | ||
72 | movl d, tmp1; \ | ||
73 | andl b, tmp1; \ | ||
74 | leal t(tmp1, a), a; \ | ||
75 | roll $s, a; \ | ||
76 | addl b, a; | ||
77 | |||
78 | /* | ||
79 | * Compute MD5 round 3 as: | ||
80 | * | ||
81 | * a = b + rol(a + H(b, c, d) + x + t, s) | ||
82 | * H(x, y, z) = x ^ y ^ z; | ||
83 | */ | ||
84 | #define md5_round3(a, b, c, d, x, t, s) \ | ||
85 | addl (x*4)(in), a; \ | ||
86 | movl d, tmp0; \ | ||
87 | xorl c, tmp0; \ | ||
88 | xorl b, tmp0; \ | ||
89 | leal t(tmp0, a), a; \ | ||
90 | roll $s, a; \ | ||
91 | addl b, a; | ||
92 | |||
93 | /* | ||
94 | * Compute MD5 round 4 as: | ||
95 | * | ||
96 | * a = b + rol(a + I(b, c, d) + x + t, s) | ||
97 | * I(x, y, z) = y ^ (x | ~z) | ||
98 | */ | ||
99 | #define md5_round4(a, b, c, d, x, t, s) \ | ||
100 | addl (x*4)(in), a; \ | ||
101 | movl d, tmp0; \ | ||
102 | xorl $-1, tmp0; \ | ||
103 | orl b, tmp0; \ | ||
104 | xorl c, tmp0; \ | ||
105 | leal t(tmp0, a), a; \ | ||
106 | roll $s, a; \ | ||
107 | addl b, a; | ||
108 | |||
109 | .text | ||
110 | |||
111 | /* | ||
112 | * void md5_block_data_order(MD5_CTX *ctx, const void *in, size_t num); | ||
113 | * | ||
114 | * Standard x86-64 ABI: rdi = ctx, rsi = in, rdx = num | ||
115 | */ | ||
116 | .align 16 | ||
117 | .globl md5_block_data_order | ||
118 | .type md5_block_data_order,@function | ||
119 | md5_block_data_order: | ||
120 | _CET_ENDBR | ||
121 | |||
122 | /* Save callee save registers. */ | ||
123 | pushq %rbx | ||
124 | pushq %rbp | ||
125 | pushq %r12 | ||
126 | pushq %r13 | ||
127 | |||
128 | /* Compute end of message. */ | ||
129 | shlq $6, num | ||
130 | leaq (in, num, 1), end | ||
131 | |||
132 | /* Load current hash state from context. */ | ||
133 | movl (0*4)(ctx), AA | ||
134 | movl (1*4)(ctx), BB | ||
135 | movl (2*4)(ctx), CC | ||
136 | movl (3*4)(ctx), DD | ||
137 | |||
138 | jmp .Lblock_loop | ||
139 | |||
140 | .align 16 | ||
141 | .Lblock_loop: | ||
142 | movl AA, A | ||
143 | movl BB, B | ||
144 | movl CC, C | ||
145 | movl DD, D | ||
146 | |||
147 | md5_round1(A, B, C, D, 0, 0xd76aa478L, 7); | ||
148 | md5_round1(D, A, B, C, 1, 0xe8c7b756L, 12); | ||
149 | md5_round1(C, D, A, B, 2, 0x242070dbL, 17); | ||
150 | md5_round1(B, C, D, A, 3, 0xc1bdceeeL, 22); | ||
151 | md5_round1(A, B, C, D, 4, 0xf57c0fafL, 7); | ||
152 | md5_round1(D, A, B, C, 5, 0x4787c62aL, 12); | ||
153 | md5_round1(C, D, A, B, 6, 0xa8304613L, 17); | ||
154 | md5_round1(B, C, D, A, 7, 0xfd469501L, 22); | ||
155 | md5_round1(A, B, C, D, 8, 0x698098d8L, 7); | ||
156 | md5_round1(D, A, B, C, 9, 0x8b44f7afL, 12); | ||
157 | md5_round1(C, D, A, B, 10, 0xffff5bb1L, 17); | ||
158 | md5_round1(B, C, D, A, 11, 0x895cd7beL, 22); | ||
159 | md5_round1(A, B, C, D, 12, 0x6b901122L, 7); | ||
160 | md5_round1(D, A, B, C, 13, 0xfd987193L, 12); | ||
161 | md5_round1(C, D, A, B, 14, 0xa679438eL, 17); | ||
162 | md5_round1(B, C, D, A, 15, 0x49b40821L, 22); | ||
163 | |||
164 | md5_round2(A, B, C, D, 1, 0xf61e2562L, 5); | ||
165 | md5_round2(D, A, B, C, 6, 0xc040b340L, 9); | ||
166 | md5_round2(C, D, A, B, 11, 0x265e5a51L, 14); | ||
167 | md5_round2(B, C, D, A, 0, 0xe9b6c7aaL, 20); | ||
168 | md5_round2(A, B, C, D, 5, 0xd62f105dL, 5); | ||
169 | md5_round2(D, A, B, C, 10, 0x02441453L, 9); | ||
170 | md5_round2(C, D, A, B, 15, 0xd8a1e681L, 14); | ||
171 | md5_round2(B, C, D, A, 4, 0xe7d3fbc8L, 20); | ||
172 | md5_round2(A, B, C, D, 9, 0x21e1cde6L, 5); | ||
173 | md5_round2(D, A, B, C, 14, 0xc33707d6L, 9); | ||
174 | md5_round2(C, D, A, B, 3, 0xf4d50d87L, 14); | ||
175 | md5_round2(B, C, D, A, 8, 0x455a14edL, 20); | ||
176 | md5_round2(A, B, C, D, 13, 0xa9e3e905L, 5); | ||
177 | md5_round2(D, A, B, C, 2, 0xfcefa3f8L, 9); | ||
178 | md5_round2(C, D, A, B, 7, 0x676f02d9L, 14); | ||
179 | md5_round2(B, C, D, A, 12, 0x8d2a4c8aL, 20); | ||
180 | |||
181 | md5_round3(A, B, C, D, 5, 0xfffa3942L, 4); | ||
182 | md5_round3(D, A, B, C, 8, 0x8771f681L, 11); | ||
183 | md5_round3(C, D, A, B, 11, 0x6d9d6122L, 16); | ||
184 | md5_round3(B, C, D, A, 14, 0xfde5380cL, 23); | ||
185 | md5_round3(A, B, C, D, 1, 0xa4beea44L, 4); | ||
186 | md5_round3(D, A, B, C, 4, 0x4bdecfa9L, 11); | ||
187 | md5_round3(C, D, A, B, 7, 0xf6bb4b60L, 16); | ||
188 | md5_round3(B, C, D, A, 10, 0xbebfbc70L, 23); | ||
189 | md5_round3(A, B, C, D, 13, 0x289b7ec6L, 4); | ||
190 | md5_round3(D, A, B, C, 0, 0xeaa127faL, 11); | ||
191 | md5_round3(C, D, A, B, 3, 0xd4ef3085L, 16); | ||
192 | md5_round3(B, C, D, A, 6, 0x04881d05L, 23); | ||
193 | md5_round3(A, B, C, D, 9, 0xd9d4d039L, 4); | ||
194 | md5_round3(D, A, B, C, 12, 0xe6db99e5L, 11); | ||
195 | md5_round3(C, D, A, B, 15, 0x1fa27cf8L, 16); | ||
196 | md5_round3(B, C, D, A, 2, 0xc4ac5665L, 23); | ||
197 | |||
198 | md5_round4(A, B, C, D, 0, 0xf4292244L, 6); | ||
199 | md5_round4(D, A, B, C, 7, 0x432aff97L, 10); | ||
200 | md5_round4(C, D, A, B, 14, 0xab9423a7L, 15); | ||
201 | md5_round4(B, C, D, A, 5, 0xfc93a039L, 21); | ||
202 | md5_round4(A, B, C, D, 12, 0x655b59c3L, 6); | ||
203 | md5_round4(D, A, B, C, 3, 0x8f0ccc92L, 10); | ||
204 | md5_round4(C, D, A, B, 10, 0xffeff47dL, 15); | ||
205 | md5_round4(B, C, D, A, 1, 0x85845dd1L, 21); | ||
206 | md5_round4(A, B, C, D, 8, 0x6fa87e4fL, 6); | ||
207 | md5_round4(D, A, B, C, 15, 0xfe2ce6e0L, 10); | ||
208 | md5_round4(C, D, A, B, 6, 0xa3014314L, 15); | ||
209 | md5_round4(B, C, D, A, 13, 0x4e0811a1L, 21); | ||
210 | md5_round4(A, B, C, D, 4, 0xf7537e82L, 6); | ||
211 | md5_round4(D, A, B, C, 11, 0xbd3af235L, 10); | ||
212 | md5_round4(C, D, A, B, 2, 0x2ad7d2bbL, 15); | ||
213 | md5_round4(B, C, D, A, 9, 0xeb86d391L, 21); | ||
214 | |||
215 | /* Add intermediate state to hash state. */ | ||
216 | addl A, AA | ||
217 | addl B, BB | ||
218 | addl C, CC | ||
219 | addl D, DD | ||
220 | |||
221 | addq $64, in | ||
222 | cmpq end, in | ||
223 | jb .Lblock_loop | ||
224 | |||
225 | /* Store new hash state to context. */ | ||
226 | movl AA, (0*4)(ctx) | ||
227 | movl BB, (1*4)(ctx) | ||
228 | movl CC, (2*4)(ctx) | ||
229 | movl DD, (3*4)(ctx) | ||
230 | |||
231 | /* Restore callee save registers. */ | ||
232 | popq %r13 | ||
233 | popq %r12 | ||
234 | popq %rbp | ||
235 | popq %rbx | ||
236 | |||
237 | ret | ||