diff options
Diffstat (limited to 'Asm/x86/XzCrc64Opt.asm')
-rw-r--r-- | Asm/x86/XzCrc64Opt.asm | 239 |
1 files changed, 239 insertions, 0 deletions
diff --git a/Asm/x86/XzCrc64Opt.asm b/Asm/x86/XzCrc64Opt.asm new file mode 100644 index 0000000..ad22cc2 --- /dev/null +++ b/Asm/x86/XzCrc64Opt.asm | |||
@@ -0,0 +1,239 @@ | |||
1 | ; XzCrc64Opt.asm -- CRC64 calculation : optimized version | ||
2 | ; 2021-02-06 : Igor Pavlov : Public domain | ||
3 | |||
4 | include 7zAsm.asm | ||
5 | |||
6 | MY_ASM_START | ||
7 | |||
8 | ifdef x64 | ||
9 | |||
10 | rD equ r9 | ||
11 | rN equ r10 | ||
12 | rT equ r5 | ||
13 | num_VAR equ r8 | ||
14 | |||
15 | SRCDAT4 equ dword ptr [rD + rN * 1] | ||
16 | |||
17 | CRC_XOR macro dest:req, src:req, t:req | ||
18 | xor dest, QWORD PTR [rT + src * 8 + 0800h * t] | ||
19 | endm | ||
20 | |||
21 | CRC1b macro | ||
22 | movzx x6, BYTE PTR [rD] | ||
23 | inc rD | ||
24 | movzx x3, x0_L | ||
25 | xor x6, x3 | ||
26 | shr r0, 8 | ||
27 | CRC_XOR r0, r6, 0 | ||
28 | dec rN | ||
29 | endm | ||
30 | |||
31 | MY_PROLOG macro crc_end:req | ||
32 | ifdef ABI_LINUX | ||
33 | MY_PUSH_2_REGS | ||
34 | else | ||
35 | MY_PUSH_4_REGS | ||
36 | endif | ||
37 | mov r0, REG_ABI_PARAM_0 | ||
38 | mov rN, REG_ABI_PARAM_2 | ||
39 | mov rT, REG_ABI_PARAM_3 | ||
40 | mov rD, REG_ABI_PARAM_1 | ||
41 | test rN, rN | ||
42 | jz crc_end | ||
43 | @@: | ||
44 | test rD, 3 | ||
45 | jz @F | ||
46 | CRC1b | ||
47 | jnz @B | ||
48 | @@: | ||
49 | cmp rN, 8 | ||
50 | jb crc_end | ||
51 | add rN, rD | ||
52 | mov num_VAR, rN | ||
53 | sub rN, 4 | ||
54 | and rN, NOT 3 | ||
55 | sub rD, rN | ||
56 | mov x1, SRCDAT4 | ||
57 | xor r0, r1 | ||
58 | add rN, 4 | ||
59 | endm | ||
60 | |||
61 | MY_EPILOG macro crc_end:req | ||
62 | sub rN, 4 | ||
63 | mov x1, SRCDAT4 | ||
64 | xor r0, r1 | ||
65 | mov rD, rN | ||
66 | mov rN, num_VAR | ||
67 | sub rN, rD | ||
68 | crc_end: | ||
69 | test rN, rN | ||
70 | jz @F | ||
71 | CRC1b | ||
72 | jmp crc_end | ||
73 | @@: | ||
74 | ifdef ABI_LINUX | ||
75 | MY_POP_2_REGS | ||
76 | else | ||
77 | MY_POP_4_REGS | ||
78 | endif | ||
79 | endm | ||
80 | |||
81 | MY_PROC XzCrc64UpdateT4, 4 | ||
82 | MY_PROLOG crc_end_4 | ||
83 | align 16 | ||
84 | main_loop_4: | ||
85 | mov x1, SRCDAT4 | ||
86 | movzx x2, x0_L | ||
87 | movzx x3, x0_H | ||
88 | shr r0, 16 | ||
89 | movzx x6, x0_L | ||
90 | movzx x7, x0_H | ||
91 | shr r0, 16 | ||
92 | CRC_XOR r1, r2, 3 | ||
93 | CRC_XOR r0, r3, 2 | ||
94 | CRC_XOR r1, r6, 1 | ||
95 | CRC_XOR r0, r7, 0 | ||
96 | xor r0, r1 | ||
97 | |||
98 | add rD, 4 | ||
99 | jnz main_loop_4 | ||
100 | |||
101 | MY_EPILOG crc_end_4 | ||
102 | MY_ENDP | ||
103 | |||
104 | else | ||
105 | ; x86 (32-bit) | ||
106 | |||
107 | rD equ r1 | ||
108 | rN equ r7 | ||
109 | rT equ r5 | ||
110 | |||
111 | crc_OFFS equ (REG_SIZE * 5) | ||
112 | |||
113 | if (IS_CDECL gt 0) or (IS_LINUX gt 0) | ||
114 | ; cdecl or (GNU fastcall) stack: | ||
115 | ; (UInt32 *) table | ||
116 | ; size_t size | ||
117 | ; void * data | ||
118 | ; (UInt64) crc | ||
119 | ; ret-ip <-(r4) | ||
120 | data_OFFS equ (8 + crc_OFFS) | ||
121 | size_OFFS equ (REG_SIZE + data_OFFS) | ||
122 | table_OFFS equ (REG_SIZE + size_OFFS) | ||
123 | num_VAR equ [r4 + size_OFFS] | ||
124 | table_VAR equ [r4 + table_OFFS] | ||
125 | else | ||
126 | ; Windows fastcall: | ||
127 | ; r1 = data, r2 = size | ||
128 | ; stack: | ||
129 | ; (UInt32 *) table | ||
130 | ; (UInt64) crc | ||
131 | ; ret-ip <-(r4) | ||
132 | table_OFFS equ (8 + crc_OFFS) | ||
133 | table_VAR equ [r4 + table_OFFS] | ||
134 | num_VAR equ table_VAR | ||
135 | endif | ||
136 | |||
137 | SRCDAT4 equ dword ptr [rD + rN * 1] | ||
138 | |||
139 | CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req | ||
140 | op0 dest0, DWORD PTR [rT + src * 8 + 0800h * t] | ||
141 | op1 dest1, DWORD PTR [rT + src * 8 + 0800h * t + 4] | ||
142 | endm | ||
143 | |||
144 | CRC_XOR macro dest0:req, dest1:req, src:req, t:req | ||
145 | CRC xor, xor, dest0, dest1, src, t | ||
146 | endm | ||
147 | |||
148 | |||
149 | CRC1b macro | ||
150 | movzx x6, BYTE PTR [rD] | ||
151 | inc rD | ||
152 | movzx x3, x0_L | ||
153 | xor x6, x3 | ||
154 | shrd r0, r2, 8 | ||
155 | shr r2, 8 | ||
156 | CRC_XOR r0, r2, r6, 0 | ||
157 | dec rN | ||
158 | endm | ||
159 | |||
160 | MY_PROLOG macro crc_end:req | ||
161 | MY_PUSH_4_REGS | ||
162 | |||
163 | if (IS_CDECL gt 0) or (IS_LINUX gt 0) | ||
164 | proc_numParams = proc_numParams + 2 ; for ABI_LINUX | ||
165 | mov rN, [r4 + size_OFFS] | ||
166 | mov rD, [r4 + data_OFFS] | ||
167 | else | ||
168 | mov rN, r2 | ||
169 | endif | ||
170 | |||
171 | mov x0, [r4 + crc_OFFS] | ||
172 | mov x2, [r4 + crc_OFFS + 4] | ||
173 | mov rT, table_VAR | ||
174 | test rN, rN | ||
175 | jz crc_end | ||
176 | @@: | ||
177 | test rD, 3 | ||
178 | jz @F | ||
179 | CRC1b | ||
180 | jnz @B | ||
181 | @@: | ||
182 | cmp rN, 8 | ||
183 | jb crc_end | ||
184 | add rN, rD | ||
185 | |||
186 | mov num_VAR, rN | ||
187 | |||
188 | sub rN, 4 | ||
189 | and rN, NOT 3 | ||
190 | sub rD, rN | ||
191 | xor r0, SRCDAT4 | ||
192 | add rN, 4 | ||
193 | endm | ||
194 | |||
195 | MY_EPILOG macro crc_end:req | ||
196 | sub rN, 4 | ||
197 | xor r0, SRCDAT4 | ||
198 | |||
199 | mov rD, rN | ||
200 | mov rN, num_VAR | ||
201 | sub rN, rD | ||
202 | crc_end: | ||
203 | test rN, rN | ||
204 | jz @F | ||
205 | CRC1b | ||
206 | jmp crc_end | ||
207 | @@: | ||
208 | MY_POP_4_REGS | ||
209 | endm | ||
210 | |||
211 | MY_PROC XzCrc64UpdateT4, 5 | ||
212 | MY_PROLOG crc_end_4 | ||
213 | movzx x6, x0_L | ||
214 | align 16 | ||
215 | main_loop_4: | ||
216 | mov r3, SRCDAT4 | ||
217 | xor r3, r2 | ||
218 | |||
219 | CRC xor, mov, r3, r2, r6, 3 | ||
220 | movzx x6, x0_H | ||
221 | shr r0, 16 | ||
222 | CRC_XOR r3, r2, r6, 2 | ||
223 | |||
224 | movzx x6, x0_L | ||
225 | movzx x0, x0_H | ||
226 | CRC_XOR r3, r2, r6, 1 | ||
227 | CRC_XOR r3, r2, r0, 0 | ||
228 | movzx x6, x3_L | ||
229 | mov r0, r3 | ||
230 | |||
231 | add rD, 4 | ||
232 | jnz main_loop_4 | ||
233 | |||
234 | MY_EPILOG crc_end_4 | ||
235 | MY_ENDP | ||
236 | |||
237 | endif ; ! x64 | ||
238 | |||
239 | end | ||