aboutsummaryrefslogtreecommitdiff
path: root/Asm/x86/Sha1Opt.asm
blob: 0b63aeb85b8abc7a5d23743e741cb5170a1d3fa3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions
; 2024-06-16 : Igor Pavlov : Public domain

include 7zAsm.asm

MY_ASM_START
















CONST   SEGMENT READONLY

align 16
Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0






















CONST   ENDS

; _TEXT$SHA1OPT SEGMENT 'CODE'

ifndef x64
    .686
    .xmm
endif

ifdef x64
        rNum    equ REG_ABI_PARAM_2
    if (IS_LINUX eq 0)
        LOCAL_SIZE equ (16 * 2)
    endif
else
        rNum    equ r0
        LOCAL_SIZE equ (16 * 1)
endif

rState equ REG_ABI_PARAM_0
rData  equ REG_ABI_PARAM_1


MY_sha1rnds4 macro a1, a2, imm
        db 0fH, 03aH, 0ccH, (0c0H + a1 * 8 + a2), imm
endm

MY_SHA_INSTR macro cmd, a1, a2
        db 0fH, 038H, cmd, (0c0H + a1 * 8 + a2)
endm

cmd_sha1nexte   equ 0c8H
cmd_sha1msg1    equ 0c9H
cmd_sha1msg2    equ 0caH

MY_sha1nexte macro a1, a2
        MY_SHA_INSTR  cmd_sha1nexte, a1, a2
endm

MY_sha1msg1 macro a1, a2
        MY_SHA_INSTR  cmd_sha1msg1, a1, a2
endm

MY_sha1msg2 macro a1, a2
        MY_SHA_INSTR  cmd_sha1msg2, a1, a2
endm

MY_PROLOG macro
    ifdef x64
      if (IS_LINUX eq 0)
        movdqa  [r4 + 8], xmm6
        movdqa  [r4 + 8 + 16], xmm7
        sub     r4, LOCAL_SIZE + 8
        movdqa  [r4     ], xmm8
        movdqa  [r4 + 16], xmm9
      endif
    else ; x86
      if (IS_CDECL gt 0)
        mov     rState, [r4 + REG_SIZE * 1]
        mov     rData,  [r4 + REG_SIZE * 2]
        mov     rNum,   [r4 + REG_SIZE * 3]
      else ; fastcall
        mov     rNum,   [r4 + REG_SIZE * 1]
      endif
        push    r5
        mov     r5, r4
        and     r4, -16
        sub     r4, LOCAL_SIZE
    endif
endm

MY_EPILOG macro
    ifdef x64
      if (IS_LINUX eq 0)
        movdqa  xmm8, [r4]
        movdqa  xmm9, [r4 + 16]
        add     r4, LOCAL_SIZE + 8
        movdqa  xmm6, [r4 + 8]
        movdqa  xmm7, [r4 + 8 + 16]
      endif
    else ; x86
        mov     r4, r5
        pop     r5
    endif
    MY_ENDP
endm


e0_N       equ 0
e1_N       equ 1
abcd_N     equ 2
e0_save_N  equ 3
w_regs     equ 4

e0      equ @CatStr(xmm, %e0_N)
e1      equ @CatStr(xmm, %e1_N)
abcd    equ @CatStr(xmm, %abcd_N)
e0_save equ @CatStr(xmm, %e0_save_N)


ifdef x64
        abcd_save    equ  xmm8
        mask2        equ  xmm9
else
        abcd_save    equ  [r4]
        mask2        equ  e1
endif

LOAD_MASK macro
        movdqa  mask2, XMMWORD PTR Reverse_Endian_Mask
endm

LOAD_W macro k:req
        movdqu  @CatStr(xmm, %(w_regs + k)), [rData + (16 * (k))]
        pshufb  @CatStr(xmm, %(w_regs + k)), mask2
endm


; pre2 can be 2 or 3 (recommended)
pre2 equ 3
pre1 equ (pre2 + 1)

NUM_ROUNDS4 equ 20
   
RND4 macro k
        movdqa  @CatStr(xmm, %(e0_N + ((k + 1) mod 2))), abcd
        MY_sha1rnds4 abcd_N, (e0_N + (k mod 2)), k / 5

        nextM = (w_regs + ((k + 1) mod 4))

    if (k EQ NUM_ROUNDS4 - 1)
        nextM = e0_save_N
    endif
        
        MY_sha1nexte (e0_N + ((k + 1) mod 2)), nextM
        
    if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2))
        pxor @CatStr(xmm, %(w_regs + ((k + pre2) mod 4))), @CatStr(xmm, %(w_regs + ((k + pre2 - 2) mod 4)))
    endif

    if (k GE (4 - pre1)) AND (k LT (NUM_ROUNDS4 - pre1))
        MY_sha1msg1 (w_regs + ((k + pre1) mod 4)), (w_regs + ((k + pre1 - 3) mod 4))
    endif
    
    if (k GE (4 - pre2)) AND (k LT (NUM_ROUNDS4 - pre2))
        MY_sha1msg2 (w_regs + ((k + pre2) mod 4)), (w_regs + ((k + pre2 - 1) mod 4))
    endif
endm


REVERSE_STATE macro
                               ; abcd   ; dcba
                               ; e0     ; 000e
        pshufd  abcd, abcd, 01bH        ; abcd
        pshufd    e0,   e0, 01bH        ; e000
endm





MY_PROC Sha1_UpdateBlocks_HW, 3
    MY_PROLOG

        cmp     rNum, 0
        je      end_c

        movdqu   abcd, [rState]               ; dcba
        movd     e0, dword ptr [rState + 16]  ; 000e

        REVERSE_STATE
       
        ifdef x64
        LOAD_MASK
        endif

    align 16
    nextBlock:
        movdqa  abcd_save, abcd
        movdqa  e0_save, e0
        
        ifndef x64
        LOAD_MASK
        endif
        
        LOAD_W 0
        LOAD_W 1
        LOAD_W 2
        LOAD_W 3

        paddd   e0, @CatStr(xmm, %(w_regs))
        k = 0
        rept NUM_ROUNDS4
          RND4 k
          k = k + 1
        endm

        paddd   abcd, abcd_save


        add     rData, 64
        sub     rNum, 1
        jnz     nextBlock
        
        REVERSE_STATE

        movdqu  [rState], abcd
        movd    dword ptr [rState + 16], e0
       
  end_c:
MY_EPILOG

; _TEXT$SHA1OPT ENDS

end