diff options
Diffstat (limited to 'Asm/x86/Sha256Opt.asm')
-rw-r--r-- | Asm/x86/Sha256Opt.asm | 263 |
1 files changed, 263 insertions, 0 deletions
diff --git a/Asm/x86/Sha256Opt.asm b/Asm/x86/Sha256Opt.asm new file mode 100644 index 0000000..5d02c90 --- /dev/null +++ b/Asm/x86/Sha256Opt.asm | |||
@@ -0,0 +1,263 @@ | |||
1 | ; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions | ||
2 | ; 2021-03-10 : Igor Pavlov : Public domain | ||
3 | |||
4 | include 7zAsm.asm | ||
5 | |||
6 | MY_ASM_START | ||
7 | |||
8 | ; .data | ||
9 | ; public K | ||
10 | |||
11 | ; we can use external SHA256_K_ARRAY defined in Sha256.c | ||
12 | ; but we must guarantee that SHA256_K_ARRAY is aligned for 16-bytes | ||
13 | |||
14 | COMMENT @ | ||
15 | ifdef x64 | ||
16 | K_CONST equ SHA256_K_ARRAY | ||
17 | else | ||
18 | K_CONST equ _SHA256_K_ARRAY | ||
19 | endif | ||
20 | EXTRN K_CONST:xmmword | ||
21 | @ | ||
22 | |||
23 | CONST SEGMENT | ||
24 | |||
25 | align 16 | ||
26 | Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 | ||
27 | |||
28 | ; COMMENT @ | ||
29 | align 16 | ||
30 | K_CONST \ | ||
31 | DD 0428a2f98H, 071374491H, 0b5c0fbcfH, 0e9b5dba5H | ||
32 | DD 03956c25bH, 059f111f1H, 0923f82a4H, 0ab1c5ed5H | ||
33 | DD 0d807aa98H, 012835b01H, 0243185beH, 0550c7dc3H | ||
34 | DD 072be5d74H, 080deb1feH, 09bdc06a7H, 0c19bf174H | ||
35 | DD 0e49b69c1H, 0efbe4786H, 00fc19dc6H, 0240ca1ccH | ||
36 | DD 02de92c6fH, 04a7484aaH, 05cb0a9dcH, 076f988daH | ||
37 | DD 0983e5152H, 0a831c66dH, 0b00327c8H, 0bf597fc7H | ||
38 | DD 0c6e00bf3H, 0d5a79147H, 006ca6351H, 014292967H | ||
39 | DD 027b70a85H, 02e1b2138H, 04d2c6dfcH, 053380d13H | ||
40 | DD 0650a7354H, 0766a0abbH, 081c2c92eH, 092722c85H | ||
41 | DD 0a2bfe8a1H, 0a81a664bH, 0c24b8b70H, 0c76c51a3H | ||
42 | DD 0d192e819H, 0d6990624H, 0f40e3585H, 0106aa070H | ||
43 | DD 019a4c116H, 01e376c08H, 02748774cH, 034b0bcb5H | ||
44 | DD 0391c0cb3H, 04ed8aa4aH, 05b9cca4fH, 0682e6ff3H | ||
45 | DD 0748f82eeH, 078a5636fH, 084c87814H, 08cc70208H | ||
46 | DD 090befffaH, 0a4506cebH, 0bef9a3f7H, 0c67178f2H | ||
47 | ; @ | ||
48 | |||
49 | CONST ENDS | ||
50 | |||
51 | ; _TEXT$SHA256OPT SEGMENT 'CODE' | ||
52 | |||
53 | ifndef x64 | ||
54 | .686 | ||
55 | .xmm | ||
56 | endif | ||
57 | |||
58 | ifdef x64 | ||
59 | rNum equ REG_ABI_PARAM_2 | ||
60 | if (IS_LINUX eq 0) | ||
61 | LOCAL_SIZE equ (16 * 2) | ||
62 | endif | ||
63 | else | ||
64 | rNum equ r0 | ||
65 | LOCAL_SIZE equ (16 * 1) | ||
66 | endif | ||
67 | |||
68 | rState equ REG_ABI_PARAM_0 | ||
69 | rData equ REG_ABI_PARAM_1 | ||
70 | |||
71 | |||
72 | |||
73 | |||
74 | |||
75 | |||
76 | MY_SHA_INSTR macro cmd, a1, a2 | ||
77 | db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2) | ||
78 | endm | ||
79 | |||
80 | cmd_sha256rnds2 equ 0cbH | ||
81 | cmd_sha256msg1 equ 0ccH | ||
82 | cmd_sha256msg2 equ 0cdH | ||
83 | |||
84 | MY_sha256rnds2 macro a1, a2 | ||
85 | MY_SHA_INSTR cmd_sha256rnds2, a1, a2 | ||
86 | endm | ||
87 | |||
88 | MY_sha256msg1 macro a1, a2 | ||
89 | MY_SHA_INSTR cmd_sha256msg1, a1, a2 | ||
90 | endm | ||
91 | |||
92 | MY_sha256msg2 macro a1, a2 | ||
93 | MY_SHA_INSTR cmd_sha256msg2, a1, a2 | ||
94 | endm | ||
95 | |||
96 | MY_PROLOG macro | ||
97 | ifdef x64 | ||
98 | if (IS_LINUX eq 0) | ||
99 | movdqa [r4 + 8], xmm6 | ||
100 | movdqa [r4 + 8 + 16], xmm7 | ||
101 | sub r4, LOCAL_SIZE + 8 | ||
102 | movdqa [r4 ], xmm8 | ||
103 | movdqa [r4 + 16], xmm9 | ||
104 | endif | ||
105 | else ; x86 | ||
106 | if (IS_CDECL gt 0) | ||
107 | mov rState, [r4 + REG_SIZE * 1] | ||
108 | mov rData, [r4 + REG_SIZE * 2] | ||
109 | mov rNum, [r4 + REG_SIZE * 3] | ||
110 | else ; fastcall | ||
111 | mov rNum, [r4 + REG_SIZE * 1] | ||
112 | endif | ||
113 | push r5 | ||
114 | mov r5, r4 | ||
115 | and r4, -16 | ||
116 | sub r4, LOCAL_SIZE | ||
117 | endif | ||
118 | endm | ||
119 | |||
120 | MY_EPILOG macro | ||
121 | ifdef x64 | ||
122 | if (IS_LINUX eq 0) | ||
123 | movdqa xmm8, [r4] | ||
124 | movdqa xmm9, [r4 + 16] | ||
125 | add r4, LOCAL_SIZE + 8 | ||
126 | movdqa xmm6, [r4 + 8] | ||
127 | movdqa xmm7, [r4 + 8 + 16] | ||
128 | endif | ||
129 | else ; x86 | ||
130 | mov r4, r5 | ||
131 | pop r5 | ||
132 | endif | ||
133 | MY_ENDP | ||
134 | endm | ||
135 | |||
136 | |||
137 | msg equ xmm0 | ||
138 | tmp equ xmm0 | ||
139 | state0_N equ 2 | ||
140 | state1_N equ 3 | ||
141 | w_regs equ 4 | ||
142 | |||
143 | |||
144 | state1_save equ xmm1 | ||
145 | state0 equ @CatStr(xmm, %state0_N) | ||
146 | state1 equ @CatStr(xmm, %state1_N) | ||
147 | |||
148 | |||
149 | ifdef x64 | ||
150 | state0_save equ xmm8 | ||
151 | mask2 equ xmm9 | ||
152 | else | ||
153 | state0_save equ [r4] | ||
154 | mask2 equ xmm0 | ||
155 | endif | ||
156 | |||
157 | LOAD_MASK macro | ||
158 | movdqa mask2, XMMWORD PTR Reverse_Endian_Mask | ||
159 | endm | ||
160 | |||
161 | LOAD_W macro k:req | ||
162 | movdqu @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))] | ||
163 | pshufb @CatStr(xmm, %(w_regs + k)), mask2 | ||
164 | endm | ||
165 | |||
166 | |||
167 | ; pre1 <= 4 && pre2 >= 1 && pre1 > pre2 && (pre1 - pre2) <= 1 | ||
168 | pre1 equ 3 | ||
169 | pre2 equ 2 | ||
170 | |||
171 | |||
172 | |||
173 | RND4 macro k | ||
174 | movdqa msg, xmmword ptr [K_CONST + (k) * 16] | ||
175 | paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4))) | ||
176 | MY_sha256rnds2 state0_N, state1_N | ||
177 | pshufd msg, msg, 0eH | ||
178 | |||
179 | if (k GE (4 - pre1)) AND (k LT (16 - pre1)) | ||
180 | ; w4[0] = msg1(w4[-4], w4[-3]) | ||
181 | MY_sha256msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4)) | ||
182 | endif | ||
183 | |||
184 | MY_sha256rnds2 state1_N, state0_N | ||
185 | |||
186 | if (k GE (4 - pre2)) AND (k LT (16 - pre2)) | ||
187 | movdqa tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 1) mod 4))) | ||
188 | palignr tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4))), 4 | ||
189 | paddd @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), tmp | ||
190 | ; w4[0] = msg2(w4[0], w4[-1]) | ||
191 | MY_sha256msg2 %(w_regs + ((k + pre2) mod 4)), %(w_regs + ((k + pre2 - 1) mod 4)) | ||
192 | endif | ||
193 | endm | ||
194 | |||
195 | |||
196 | |||
197 | |||
198 | |||
199 | REVERSE_STATE macro | ||
200 | ; state0 ; dcba | ||
201 | ; state1 ; hgfe | ||
202 | pshufd tmp, state0, 01bH ; abcd | ||
203 | pshufd state0, state1, 01bH ; efgh | ||
204 | movdqa state1, state0 ; efgh | ||
205 | punpcklqdq state0, tmp ; cdgh | ||
206 | punpckhqdq state1, tmp ; abef | ||
207 | endm | ||
208 | |||
209 | |||
210 | MY_PROC Sha256_UpdateBlocks_HW, 3 | ||
211 | MY_PROLOG | ||
212 | |||
213 | cmp rNum, 0 | ||
214 | je end_c | ||
215 | |||
216 | movdqu state0, [rState] ; dcba | ||
217 | movdqu state1, [rState + 16] ; hgfe | ||
218 | |||
219 | REVERSE_STATE | ||
220 | |||
221 | ifdef x64 | ||
222 | LOAD_MASK | ||
223 | endif | ||
224 | |||
225 | align 16 | ||
226 | nextBlock: | ||
227 | movdqa state0_save, state0 | ||
228 | movdqa state1_save, state1 | ||
229 | |||
230 | ifndef x64 | ||
231 | LOAD_MASK | ||
232 | endif | ||
233 | |||
234 | LOAD_W 0 | ||
235 | LOAD_W 1 | ||
236 | LOAD_W 2 | ||
237 | LOAD_W 3 | ||
238 | |||
239 | |||
240 | k = 0 | ||
241 | rept 16 | ||
242 | RND4 k | ||
243 | k = k + 1 | ||
244 | endm | ||
245 | |||
246 | paddd state0, state0_save | ||
247 | paddd state1, state1_save | ||
248 | |||
249 | add rData, 64 | ||
250 | sub rNum, 1 | ||
251 | jnz nextBlock | ||
252 | |||
253 | REVERSE_STATE | ||
254 | |||
255 | movdqu [rState], state0 | ||
256 | movdqu [rState + 16], state1 | ||
257 | |||
258 | end_c: | ||
259 | MY_EPILOG | ||
260 | |||
261 | ; _TEXT$SHA256OPT ENDS | ||
262 | |||
263 | end | ||