src/lib/libcrypto/sha/sha1_amd64_shani.S


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

/* $OpenBSD: sha1_amd64_shani.S,v 1.1 2024/12/06 11:57:18 jsing Exp $ */
/*
 * Copyright (c) 2024 Joel Sing <jsing@openbsd.org>
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#ifdef __CET__
#include <cet.h>
#else
#define _CET_ENDBR
#endif

/*
 * SHA-1 implementation using the Intel SHA extensions:
 *
 * https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html
 */

#define	ctx		%rdi
#define	in		%rsi
#define	num		%rdx

#define	end		%rbx

#define	xabcd_save	%xmm0
#define	xe_save		%xmm1

#define	xabcd		%xmm2
#define	xe0		%xmm3
#define	xe1		%xmm4

#define	xmsg0		%xmm5
#define	xmsg1		%xmm6
#define	xmsg2		%xmm7
#define	xmsg3		%xmm8

#define	xshufmask	%xmm9


#define sha1_message_schedule_load(idx, m, xmsg) \
	movdqu	(idx*16)(m), xmsg;					\
	pshufb	xshufmask, xmsg;

#define sha1_message_schedule_update(xm0, xm1, xm2, xm3) \
	sha1msg1 xm1, xm0;						\
	pxor	xm2, xm0;						\
	sha1msg2 xm3, xm0;

#define sha1_shani_round(fn, xmsg, xe, xe_next) \
	sha1nexte xmsg, xe;						\
	movdqa	xabcd, xe_next;						\
	sha1rnds4 fn, xe, xabcd;

#define sha1_shani_round_load(fn, idx, m, xmsg, xe, xe_next) \
	sha1_message_schedule_load(idx, m, xmsg);			\
	sha1_shani_round(fn, xmsg, xe, xe_next);

#define sha1_shani_round_update(fn, xm0, xm1, xm2, xm3, xe, xe_next) \
	sha1_message_schedule_update(xm0, xm1, xm2, xm3);		\
	sha1_shani_round(fn, xm0, xe, xe_next);


.text

/*
 * void sha1_block_shani(SHA256_CTX *ctx, const void *in, size_t num);
 *
 * Standard x86-64 ABI: rdi = ctx, rsi = in, rdx = num
 */
.align 16
.globl	sha1_block_shani
.type	sha1_block_shani,@function
sha1_block_shani:
	_CET_ENDBR

	/* Save callee save registers. */
	pushq	%rbx

	/* Compute end of message. */
	shlq	$6, num
	leaq	(in, num, 1), end

	/* Load endian shuffle mask. */
	movdqa	shufmask(%rip), xshufmask

	/* Load current hash state from context. */
	movdqu	(0*16)(ctx), xabcd
	pshufd	$0x1b, xabcd, xabcd	/* dcba -> abcd */
	pxor	xe0, xe0
	pinsrd	$3, (1*16)(ctx), xe0	/* e */

	jmp	.Lshani_block_loop

.align 16
.Lshani_block_loop:
	/* Save state for accumulation. */
	movdqa	xabcd, xabcd_save
	movdqa	xe0, xe_save

	/* Rounds 0 through 15 (four rounds at a time). */
	sha1_message_schedule_load(0, in, xmsg0);
	paddd	xmsg0, xe0
	movdqa	xabcd, xe1
	sha1rnds4 $0, xe0, xabcd

	sha1_shani_round_load($0, 1, in, xmsg1, xe1, xe0);
	sha1_shani_round_load($0, 2, in, xmsg2, xe0, xe1);
	sha1_shani_round_load($0, 3, in, xmsg3, xe1, xe0);

	/* Rounds 16 through 79 (four rounds at a time). */
	sha1_shani_round_update($0, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1)
	sha1_shani_round_update($1, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0)
	sha1_shani_round_update($1, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1)
	sha1_shani_round_update($1, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0)

	sha1_shani_round_update($1, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1)
	sha1_shani_round_update($1, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0)
	sha1_shani_round_update($2, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1)
	sha1_shani_round_update($2, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0)

	sha1_shani_round_update($2, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1)
	sha1_shani_round_update($2, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0)
	sha1_shani_round_update($2, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1)
	sha1_shani_round_update($3, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0)

	sha1_shani_round_update($3, xmsg0, xmsg1, xmsg2, xmsg3, xe0, xe1)
	sha1_shani_round_update($3, xmsg1, xmsg2, xmsg3, xmsg0, xe1, xe0)
	sha1_shani_round_update($3, xmsg2, xmsg3, xmsg0, xmsg1, xe0, xe1)
	sha1_shani_round_update($3, xmsg3, xmsg0, xmsg1, xmsg2, xe1, xe0)

	/* Accumulate hash state. */
	paddd	xabcd_save, xabcd
	sha1nexte xe_save, xe0

	addq	$64, in
	cmpq	end, in
	jb	.Lshani_block_loop

	/* Update stored hash context. */
	pshufd	$0x1b, xabcd, xabcd	/* abcd -> dcba */
	movdqu	xabcd, (0*16)(ctx)
	pextrd	$3, xe0, (1*16)(ctx)	/* e */

	/* Restore callee save registers. */
	popq	%rbx

	ret

.rodata

/*
 * Shuffle mask - byte reversal for little endian to big endian word conversion,
 * and reordering to abcd.
 */
.align	16
.type	shufmask,@object
shufmask:
.octa	0x000102030405060708090a0b0c0d0e0f
.size	shufmask,.-shufmask