aboutsummaryrefslogtreecommitdiff
path: root/Asm/x86/Sha256Opt.asm
diff options
context:
space:
mode:
Diffstat (limited to 'Asm/x86/Sha256Opt.asm')
-rw-r--r--Asm/x86/Sha256Opt.asm263
1 files changed, 263 insertions, 0 deletions
diff --git a/Asm/x86/Sha256Opt.asm b/Asm/x86/Sha256Opt.asm
new file mode 100644
index 0000000..5d02c90
--- /dev/null
+++ b/Asm/x86/Sha256Opt.asm
@@ -0,0 +1,263 @@
1; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions
2; 2021-03-10 : Igor Pavlov : Public domain
3
4include 7zAsm.asm
5
6MY_ASM_START
7
8; .data
9; public K
10
11; we can use external SHA256_K_ARRAY defined in Sha256.c
12; but we must guarantee that SHA256_K_ARRAY is aligned for 16-bytes
13
14COMMENT @
15ifdef x64
16K_CONST equ SHA256_K_ARRAY
17else
18K_CONST equ _SHA256_K_ARRAY
19endif
20EXTRN K_CONST:xmmword
21@
22
23CONST SEGMENT
24
25align 16
26Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
27
28; COMMENT @
29align 16
30K_CONST \
31DD 0428a2f98H, 071374491H, 0b5c0fbcfH, 0e9b5dba5H
32DD 03956c25bH, 059f111f1H, 0923f82a4H, 0ab1c5ed5H
33DD 0d807aa98H, 012835b01H, 0243185beH, 0550c7dc3H
34DD 072be5d74H, 080deb1feH, 09bdc06a7H, 0c19bf174H
35DD 0e49b69c1H, 0efbe4786H, 00fc19dc6H, 0240ca1ccH
36DD 02de92c6fH, 04a7484aaH, 05cb0a9dcH, 076f988daH
37DD 0983e5152H, 0a831c66dH, 0b00327c8H, 0bf597fc7H
38DD 0c6e00bf3H, 0d5a79147H, 006ca6351H, 014292967H
39DD 027b70a85H, 02e1b2138H, 04d2c6dfcH, 053380d13H
40DD 0650a7354H, 0766a0abbH, 081c2c92eH, 092722c85H
41DD 0a2bfe8a1H, 0a81a664bH, 0c24b8b70H, 0c76c51a3H
42DD 0d192e819H, 0d6990624H, 0f40e3585H, 0106aa070H
43DD 019a4c116H, 01e376c08H, 02748774cH, 034b0bcb5H
44DD 0391c0cb3H, 04ed8aa4aH, 05b9cca4fH, 0682e6ff3H
45DD 0748f82eeH, 078a5636fH, 084c87814H, 08cc70208H
46DD 090befffaH, 0a4506cebH, 0bef9a3f7H, 0c67178f2H
47; @
48
49CONST ENDS
50
51; _TEXT$SHA256OPT SEGMENT 'CODE'
52
53ifndef x64
54 .686
55 .xmm
56endif
57
58ifdef x64
59 rNum equ REG_ABI_PARAM_2
60 if (IS_LINUX eq 0)
61 LOCAL_SIZE equ (16 * 2)
62 endif
63else
64 rNum equ r0
65 LOCAL_SIZE equ (16 * 1)
66endif
67
68rState equ REG_ABI_PARAM_0
69rData equ REG_ABI_PARAM_1
70
71
72
73
74
75
76MY_SHA_INSTR macro cmd, a1, a2
77 db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2)
78endm
79
80cmd_sha256rnds2 equ 0cbH
81cmd_sha256msg1 equ 0ccH
82cmd_sha256msg2 equ 0cdH
83
84MY_sha256rnds2 macro a1, a2
85 MY_SHA_INSTR cmd_sha256rnds2, a1, a2
86endm
87
88MY_sha256msg1 macro a1, a2
89 MY_SHA_INSTR cmd_sha256msg1, a1, a2
90endm
91
92MY_sha256msg2 macro a1, a2
93 MY_SHA_INSTR cmd_sha256msg2, a1, a2
94endm
95
96MY_PROLOG macro
97 ifdef x64
98 if (IS_LINUX eq 0)
99 movdqa [r4 + 8], xmm6
100 movdqa [r4 + 8 + 16], xmm7
101 sub r4, LOCAL_SIZE + 8
102 movdqa [r4 ], xmm8
103 movdqa [r4 + 16], xmm9
104 endif
105 else ; x86
106 if (IS_CDECL gt 0)
107 mov rState, [r4 + REG_SIZE * 1]
108 mov rData, [r4 + REG_SIZE * 2]
109 mov rNum, [r4 + REG_SIZE * 3]
110 else ; fastcall
111 mov rNum, [r4 + REG_SIZE * 1]
112 endif
113 push r5
114 mov r5, r4
115 and r4, -16
116 sub r4, LOCAL_SIZE
117 endif
118endm
119
120MY_EPILOG macro
121 ifdef x64
122 if (IS_LINUX eq 0)
123 movdqa xmm8, [r4]
124 movdqa xmm9, [r4 + 16]
125 add r4, LOCAL_SIZE + 8
126 movdqa xmm6, [r4 + 8]
127 movdqa xmm7, [r4 + 8 + 16]
128 endif
129 else ; x86
130 mov r4, r5
131 pop r5
132 endif
133 MY_ENDP
134endm
135
136
137msg equ xmm0
138tmp equ xmm0
139state0_N equ 2
140state1_N equ 3
141w_regs equ 4
142
143
144state1_save equ xmm1
145state0 equ @CatStr(xmm, %state0_N)
146state1 equ @CatStr(xmm, %state1_N)
147
148
149ifdef x64
150 state0_save equ xmm8
151 mask2 equ xmm9
152else
153 state0_save equ [r4]
154 mask2 equ xmm0
155endif
156
157LOAD_MASK macro
158 movdqa mask2, XMMWORD PTR Reverse_Endian_Mask
159endm
160
161LOAD_W macro k:req
162 movdqu @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))]
163 pshufb @CatStr(xmm, %(w_regs + k)), mask2
164endm
165
166
167; pre1 <= 4 && pre2 >= 1 && pre1 > pre2 && (pre1 - pre2) <= 1
168pre1 equ 3
169pre2 equ 2
170
171
172
173RND4 macro k
174 movdqa msg, xmmword ptr [K_CONST + (k) * 16]
175 paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4)))
176 MY_sha256rnds2 state0_N, state1_N
177 pshufd msg, msg, 0eH
178
179 if (k GE (4 - pre1)) AND (k LT (16 - pre1))
180 ; w4[0] = msg1(w4[-4], w4[-3])
181 MY_sha256msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4))
182 endif
183
184 MY_sha256rnds2 state1_N, state0_N
185
186 if (k GE (4 - pre2)) AND (k LT (16 - pre2))
187 movdqa tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 1) mod 4)))
188 palignr tmp, @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4))), 4
189 paddd @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), tmp
190 ; w4[0] = msg2(w4[0], w4[-1])
191 MY_sha256msg2 %(w_regs + ((k + pre2) mod 4)), %(w_regs + ((k + pre2 - 1) mod 4))
192 endif
193endm
194
195
196
197
198
199REVERSE_STATE macro
200 ; state0 ; dcba
201 ; state1 ; hgfe
202 pshufd tmp, state0, 01bH ; abcd
203 pshufd state0, state1, 01bH ; efgh
204 movdqa state1, state0 ; efgh
205 punpcklqdq state0, tmp ; cdgh
206 punpckhqdq state1, tmp ; abef
207endm
208
209
210MY_PROC Sha256_UpdateBlocks_HW, 3
211 MY_PROLOG
212
213 cmp rNum, 0
214 je end_c
215
216 movdqu state0, [rState] ; dcba
217 movdqu state1, [rState + 16] ; hgfe
218
219 REVERSE_STATE
220
221 ifdef x64
222 LOAD_MASK
223 endif
224
225 align 16
226 nextBlock:
227 movdqa state0_save, state0
228 movdqa state1_save, state1
229
230 ifndef x64
231 LOAD_MASK
232 endif
233
234 LOAD_W 0
235 LOAD_W 1
236 LOAD_W 2
237 LOAD_W 3
238
239
240 k = 0
241 rept 16
242 RND4 k
243 k = k + 1
244 endm
245
246 paddd state0, state0_save
247 paddd state1, state1_save
248
249 add rData, 64
250 sub rNum, 1
251 jnz nextBlock
252
253 REVERSE_STATE
254
255 movdqu [rState], state0
256 movdqu [rState + 16], state1
257
258 end_c:
259MY_EPILOG
260
261; _TEXT$SHA256OPT ENDS
262
263end