diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2021-12-27 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2022-03-18 15:35:13 +0500 |
commit | f19f813537c7aea1c20749c914e756b54a9c3cf5 (patch) | |
tree | 816ba62ca7c0fa19f2eb46d9e9d6f7dd7c3a744d /Asm/x86/Sha1Opt.asm | |
parent | 98e06a519b63b81986abe76d28887f6984a7732b (diff) | |
download | 7zip-21.07.tar.gz 7zip-21.07.tar.bz2 7zip-21.07.zip |
'21.07'21.07
Diffstat (limited to 'Asm/x86/Sha1Opt.asm')
-rw-r--r-- | Asm/x86/Sha1Opt.asm | 263 |
1 files changed, 263 insertions, 0 deletions
diff --git a/Asm/x86/Sha1Opt.asm b/Asm/x86/Sha1Opt.asm new file mode 100644 index 0000000..3495fd1 --- /dev/null +++ b/Asm/x86/Sha1Opt.asm | |||
@@ -0,0 +1,263 @@ | |||
1 | ; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions | ||
2 | ; 2021-03-10 : Igor Pavlov : Public domain | ||
3 | |||
4 | include 7zAsm.asm | ||
5 | |||
6 | MY_ASM_START | ||
7 | |||
8 | |||
9 | |||
10 | |||
11 | |||
12 | |||
13 | |||
14 | |||
15 | |||
16 | |||
17 | |||
18 | |||
19 | |||
20 | |||
21 | |||
22 | |||
23 | CONST SEGMENT | ||
24 | |||
25 | align 16 | ||
26 | Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0 | ||
27 | |||
28 | |||
29 | |||
30 | |||
31 | |||
32 | |||
33 | |||
34 | |||
35 | |||
36 | |||
37 | |||
38 | |||
39 | |||
40 | |||
41 | |||
42 | |||
43 | |||
44 | |||
45 | |||
46 | |||
47 | |||
48 | |||
49 | CONST ENDS | ||
50 | |||
51 | ; _TEXT$SHA1OPT SEGMENT 'CODE' | ||
52 | |||
53 | ifndef x64 | ||
54 | .686 | ||
55 | .xmm | ||
56 | endif | ||
57 | |||
58 | ifdef x64 | ||
59 | rNum equ REG_ABI_PARAM_2 | ||
60 | if (IS_LINUX eq 0) | ||
61 | LOCAL_SIZE equ (16 * 2) | ||
62 | endif | ||
63 | else | ||
64 | rNum equ r0 | ||
65 | LOCAL_SIZE equ (16 * 1) | ||
66 | endif | ||
67 | |||
68 | rState equ REG_ABI_PARAM_0 | ||
69 | rData equ REG_ABI_PARAM_1 | ||
70 | |||
71 | |||
72 | MY_sha1rnds4 macro a1, a2, imm | ||
73 | db 0fH, 03aH, 0ccH, (0c0H + a1 * 8 + a2), imm | ||
74 | endm | ||
75 | |||
76 | MY_SHA_INSTR macro cmd, a1, a2 | ||
77 | db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2) | ||
78 | endm | ||
79 | |||
80 | cmd_sha1nexte equ 0c8H | ||
81 | cmd_sha1msg1 equ 0c9H | ||
82 | cmd_sha1msg2 equ 0caH | ||
83 | |||
84 | MY_sha1nexte macro a1, a2 | ||
85 | MY_SHA_INSTR cmd_sha1nexte, a1, a2 | ||
86 | endm | ||
87 | |||
88 | MY_sha1msg1 macro a1, a2 | ||
89 | MY_SHA_INSTR cmd_sha1msg1, a1, a2 | ||
90 | endm | ||
91 | |||
92 | MY_sha1msg2 macro a1, a2 | ||
93 | MY_SHA_INSTR cmd_sha1msg2, a1, a2 | ||
94 | endm | ||
95 | |||
96 | MY_PROLOG macro | ||
97 | ifdef x64 | ||
98 | if (IS_LINUX eq 0) | ||
99 | movdqa [r4 + 8], xmm6 | ||
100 | movdqa [r4 + 8 + 16], xmm7 | ||
101 | sub r4, LOCAL_SIZE + 8 | ||
102 | movdqa [r4 ], xmm8 | ||
103 | movdqa [r4 + 16], xmm9 | ||
104 | endif | ||
105 | else ; x86 | ||
106 | if (IS_CDECL gt 0) | ||
107 | mov rState, [r4 + REG_SIZE * 1] | ||
108 | mov rData, [r4 + REG_SIZE * 2] | ||
109 | mov rNum, [r4 + REG_SIZE * 3] | ||
110 | else ; fastcall | ||
111 | mov rNum, [r4 + REG_SIZE * 1] | ||
112 | endif | ||
113 | push r5 | ||
114 | mov r5, r4 | ||
115 | and r4, -16 | ||
116 | sub r4, LOCAL_SIZE | ||
117 | endif | ||
118 | endm | ||
119 | |||
120 | MY_EPILOG macro | ||
121 | ifdef x64 | ||
122 | if (IS_LINUX eq 0) | ||
123 | movdqa xmm8, [r4] | ||
124 | movdqa xmm9, [r4 + 16] | ||
125 | add r4, LOCAL_SIZE + 8 | ||
126 | movdqa xmm6, [r4 + 8] | ||
127 | movdqa xmm7, [r4 + 8 + 16] | ||
128 | endif | ||
129 | else ; x86 | ||
130 | mov r4, r5 | ||
131 | pop r5 | ||
132 | endif | ||
133 | MY_ENDP | ||
134 | endm | ||
135 | |||
136 | |||
137 | e0_N equ 0 | ||
138 | e1_N equ 1 | ||
139 | abcd_N equ 2 | ||
140 | e0_save_N equ 3 | ||
141 | w_regs equ 4 | ||
142 | |||
143 | e0 equ @CatStr(xmm, %e0_N) | ||
144 | e1 equ @CatStr(xmm, %e1_N) | ||
145 | abcd equ @CatStr(xmm, %abcd_N) | ||
146 | e0_save equ @CatStr(xmm, %e0_save_N) | ||
147 | |||
148 | |||
149 | ifdef x64 | ||
150 | abcd_save equ xmm8 | ||
151 | mask2 equ xmm9 | ||
152 | else | ||
153 | abcd_save equ [r4] | ||
154 | mask2 equ e1 | ||
155 | endif | ||
156 | |||
157 | LOAD_MASK macro | ||
158 | movdqa mask2, XMMWORD PTR Reverse_Endian_Mask | ||
159 | endm | ||
160 | |||
161 | LOAD_W macro k:req | ||
162 | movdqu @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))] | ||
163 | pshufb @CatStr(xmm, %(w_regs + k)), mask2 | ||
164 | endm | ||
165 | |||
166 | |||
167 | ; pre2 can be 2 or 3 (recommended) | ||
168 | pre2 equ 3 | ||
169 | pre1 equ (pre2 + 1) | ||
170 | |||
171 | NUM_ROUNDS4 equ 20 | ||
172 | |||
173 | RND4 macro k | ||
174 | movdqa @CatStr(xmm, %(e0_N + ((k + 1) mod 2))), abcd | ||
175 | MY_sha1rnds4 abcd_N, (e0_N + (k mod 2)), k / 5 | ||
176 | |||
177 | nextM = (w_regs + ((k + 1) mod 4)) | ||
178 | |||
179 | if (k EQ NUM_ROUNDS4 - 1) | ||
180 | nextM = e0_save_N | ||
181 | endif | ||
182 | |||
183 | MY_sha1nexte (e0_N + ((k + 1) mod 2)), nextM | ||
184 | |||
185 | if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2)) | ||
186 | pxor @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4))) | ||
187 | endif | ||
188 | |||
189 | if (k GE (4 - pre1)) AND (k LT (NUM_ROUNDS4 - pre1)) | ||
190 | MY_sha1msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4)) | ||
191 | endif | ||
192 | |||
193 | if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2)) | ||
194 | MY_sha1msg2 (w_regs + ((k + pre2) mod 4)), (w_regs + ((k + pre2 - 1) mod 4)) | ||
195 | endif | ||
196 | endm | ||
197 | |||
198 | |||
199 | REVERSE_STATE macro | ||
200 | ; abcd ; dcba | ||
201 | ; e0 ; 000e | ||
202 | pshufd abcd, abcd, 01bH ; abcd | ||
203 | pshufd e0, e0, 01bH ; e000 | ||
204 | endm | ||
205 | |||
206 | |||
207 | |||
208 | |||
209 | |||
210 | MY_PROC Sha1_UpdateBlocks_HW, 3 | ||
211 | MY_PROLOG | ||
212 | |||
213 | cmp rNum, 0 | ||
214 | je end_c | ||
215 | |||
216 | movdqu abcd, [rState] ; dcba | ||
217 | movd e0, dword ptr [rState + 16] ; 000e | ||
218 | |||
219 | REVERSE_STATE | ||
220 | |||
221 | ifdef x64 | ||
222 | LOAD_MASK | ||
223 | endif | ||
224 | |||
225 | align 16 | ||
226 | nextBlock: | ||
227 | movdqa abcd_save, abcd | ||
228 | movdqa e0_save, e0 | ||
229 | |||
230 | ifndef x64 | ||
231 | LOAD_MASK | ||
232 | endif | ||
233 | |||
234 | LOAD_W 0 | ||
235 | LOAD_W 1 | ||
236 | LOAD_W 2 | ||
237 | LOAD_W 3 | ||
238 | |||
239 | paddd e0, @CatStr(xmm, %(w_regs)) | ||
240 | k = 0 | ||
241 | rept NUM_ROUNDS4 | ||
242 | RND4 k | ||
243 | k = k + 1 | ||
244 | endm | ||
245 | |||
246 | paddd abcd, abcd_save | ||
247 | |||
248 | |||
249 | add rData, 64 | ||
250 | sub rNum, 1 | ||
251 | jnz nextBlock | ||
252 | |||
253 | REVERSE_STATE | ||
254 | |||
255 | movdqu [rState], abcd | ||
256 | movd dword ptr [rState + 16], e0 | ||
257 | |||
258 | end_c: | ||
259 | MY_EPILOG | ||
260 | |||
261 | ; _TEXT$SHA1OPT ENDS | ||
262 | |||
263 | end | ||