aboutsummaryrefslogtreecommitdiff
path: root/Asm/x86/Sha1Opt.asm
diff options
context:
space:
mode:
Diffstat (limited to 'Asm/x86/Sha1Opt.asm')
-rw-r--r--Asm/x86/Sha1Opt.asm263
1 files changed, 263 insertions, 0 deletions
diff --git a/Asm/x86/Sha1Opt.asm b/Asm/x86/Sha1Opt.asm
new file mode 100644
index 0000000..3495fd1
--- /dev/null
+++ b/Asm/x86/Sha1Opt.asm
@@ -0,0 +1,263 @@
1; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions
2; 2021-03-10 : Igor Pavlov : Public domain
3
4include 7zAsm.asm
5
6MY_ASM_START
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23CONST SEGMENT
24
25align 16
26Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49CONST ENDS
50
51; _TEXT$SHA1OPT SEGMENT 'CODE'
52
53ifndef x64
54 .686
55 .xmm
56endif
57
58ifdef x64
59 rNum equ REG_ABI_PARAM_2
60 if (IS_LINUX eq 0)
61 LOCAL_SIZE equ (16 * 2)
62 endif
63else
64 rNum equ r0
65 LOCAL_SIZE equ (16 * 1)
66endif
67
68rState equ REG_ABI_PARAM_0
69rData equ REG_ABI_PARAM_1
70
71
72MY_sha1rnds4 macro a1, a2, imm
73 db 0fH, 03aH, 0ccH, (0c0H + a1 * 8 + a2), imm
74endm
75
76MY_SHA_INSTR macro cmd, a1, a2
77 db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2)
78endm
79
80cmd_sha1nexte equ 0c8H
81cmd_sha1msg1 equ 0c9H
82cmd_sha1msg2 equ 0caH
83
84MY_sha1nexte macro a1, a2
85 MY_SHA_INSTR cmd_sha1nexte, a1, a2
86endm
87
88MY_sha1msg1 macro a1, a2
89 MY_SHA_INSTR cmd_sha1msg1, a1, a2
90endm
91
92MY_sha1msg2 macro a1, a2
93 MY_SHA_INSTR cmd_sha1msg2, a1, a2
94endm
95
96MY_PROLOG macro
97 ifdef x64
98 if (IS_LINUX eq 0)
99 movdqa [r4 + 8], xmm6
100 movdqa [r4 + 8 + 16], xmm7
101 sub r4, LOCAL_SIZE + 8
102 movdqa [r4 ], xmm8
103 movdqa [r4 + 16], xmm9
104 endif
105 else ; x86
106 if (IS_CDECL gt 0)
107 mov rState, [r4 + REG_SIZE * 1]
108 mov rData, [r4 + REG_SIZE * 2]
109 mov rNum, [r4 + REG_SIZE * 3]
110 else ; fastcall
111 mov rNum, [r4 + REG_SIZE * 1]
112 endif
113 push r5
114 mov r5, r4
115 and r4, -16
116 sub r4, LOCAL_SIZE
117 endif
118endm
119
120MY_EPILOG macro
121 ifdef x64
122 if (IS_LINUX eq 0)
123 movdqa xmm8, [r4]
124 movdqa xmm9, [r4 + 16]
125 add r4, LOCAL_SIZE + 8
126 movdqa xmm6, [r4 + 8]
127 movdqa xmm7, [r4 + 8 + 16]
128 endif
129 else ; x86
130 mov r4, r5
131 pop r5
132 endif
133 MY_ENDP
134endm
135
136
137e0_N equ 0
138e1_N equ 1
139abcd_N equ 2
140e0_save_N equ 3
141w_regs equ 4
142
143e0 equ @CatStr(xmm, %e0_N)
144e1 equ @CatStr(xmm, %e1_N)
145abcd equ @CatStr(xmm, %abcd_N)
146e0_save equ @CatStr(xmm, %e0_save_N)
147
148
149ifdef x64
150 abcd_save equ xmm8
151 mask2 equ xmm9
152else
153 abcd_save equ [r4]
154 mask2 equ e1
155endif
156
157LOAD_MASK macro
158 movdqa mask2, XMMWORD PTR Reverse_Endian_Mask
159endm
160
161LOAD_W macro k:req
162 movdqu @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))]
163 pshufb @CatStr(xmm, %(w_regs + k)), mask2
164endm
165
166
167; pre2 can be 2 or 3 (recommended)
168pre2 equ 3
169pre1 equ (pre2 + 1)
170
171NUM_ROUNDS4 equ 20
172
173RND4 macro k
174 movdqa @CatStr(xmm, %(e0_N + ((k + 1) mod 2))), abcd
175 MY_sha1rnds4 abcd_N, (e0_N + (k mod 2)), k / 5
176
177 nextM = (w_regs + ((k + 1) mod 4))
178
179 if (k EQ NUM_ROUNDS4 - 1)
180 nextM = e0_save_N
181 endif
182
183 MY_sha1nexte (e0_N + ((k + 1) mod 2)), nextM
184
185 if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2))
186 pxor @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4)))
187 endif
188
189 if (k GE (4 - pre1)) AND (k LT (NUM_ROUNDS4 - pre1))
190 MY_sha1msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4))
191 endif
192
193 if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2))
194 MY_sha1msg2 (w_regs + ((k + pre2) mod 4)), (w_regs + ((k + pre2 - 1) mod 4))
195 endif
196endm
197
198
199REVERSE_STATE macro
200 ; abcd ; dcba
201 ; e0 ; 000e
202 pshufd abcd, abcd, 01bH ; abcd
203 pshufd e0, e0, 01bH ; e000
204endm
205
206
207
208
209
210MY_PROC Sha1_UpdateBlocks_HW, 3
211 MY_PROLOG
212
213 cmp rNum, 0
214 je end_c
215
216 movdqu abcd, [rState] ; dcba
217 movd e0, dword ptr [rState + 16] ; 000e
218
219 REVERSE_STATE
220
221 ifdef x64
222 LOAD_MASK
223 endif
224
225 align 16
226 nextBlock:
227 movdqa abcd_save, abcd
228 movdqa e0_save, e0
229
230 ifndef x64
231 LOAD_MASK
232 endif
233
234 LOAD_W 0
235 LOAD_W 1
236 LOAD_W 2
237 LOAD_W 3
238
239 paddd e0, @CatStr(xmm, %(w_regs))
240 k = 0
241 rept NUM_ROUNDS4
242 RND4 k
243 k = k + 1
244 endm
245
246 paddd abcd, abcd_save
247
248
249 add rData, 64
250 sub rNum, 1
251 jnz nextBlock
252
253 REVERSE_STATE
254
255 movdqu [rState], abcd
256 movd dword ptr [rState + 16], e0
257
258 end_c:
259MY_EPILOG
260
261; _TEXT$SHA1OPT ENDS
262
263end