From f19f813537c7aea1c20749c914e756b54a9c3cf5 Mon Sep 17 00:00:00 2001 From: Igor Pavlov <87184205+ip7z@users.noreply.github.com> Date: Mon, 27 Dec 2021 00:00:00 +0000 Subject: '21.07' --- Asm/x86/XzCrc64Opt.asm | 239 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) create mode 100644 Asm/x86/XzCrc64Opt.asm (limited to 'Asm/x86/XzCrc64Opt.asm') diff --git a/Asm/x86/XzCrc64Opt.asm b/Asm/x86/XzCrc64Opt.asm new file mode 100644 index 0000000..ad22cc2 --- /dev/null +++ b/Asm/x86/XzCrc64Opt.asm @@ -0,0 +1,239 @@ +; XzCrc64Opt.asm -- CRC64 calculation : optimized version +; 2021-02-06 : Igor Pavlov : Public domain + +include 7zAsm.asm + +MY_ASM_START + +ifdef x64 + +rD equ r9 +rN equ r10 +rT equ r5 +num_VAR equ r8 + +SRCDAT4 equ dword ptr [rD + rN * 1] + +CRC_XOR macro dest:req, src:req, t:req + xor dest, QWORD PTR [rT + src * 8 + 0800h * t] +endm + +CRC1b macro + movzx x6, BYTE PTR [rD] + inc rD + movzx x3, x0_L + xor x6, x3 + shr r0, 8 + CRC_XOR r0, r6, 0 + dec rN +endm + +MY_PROLOG macro crc_end:req + ifdef ABI_LINUX + MY_PUSH_2_REGS + else + MY_PUSH_4_REGS + endif + mov r0, REG_ABI_PARAM_0 + mov rN, REG_ABI_PARAM_2 + mov rT, REG_ABI_PARAM_3 + mov rD, REG_ABI_PARAM_1 + test rN, rN + jz crc_end + @@: + test rD, 3 + jz @F + CRC1b + jnz @B + @@: + cmp rN, 8 + jb crc_end + add rN, rD + mov num_VAR, rN + sub rN, 4 + and rN, NOT 3 + sub rD, rN + mov x1, SRCDAT4 + xor r0, r1 + add rN, 4 +endm + +MY_EPILOG macro crc_end:req + sub rN, 4 + mov x1, SRCDAT4 + xor r0, r1 + mov rD, rN + mov rN, num_VAR + sub rN, rD + crc_end: + test rN, rN + jz @F + CRC1b + jmp crc_end + @@: + ifdef ABI_LINUX + MY_POP_2_REGS + else + MY_POP_4_REGS + endif +endm + +MY_PROC XzCrc64UpdateT4, 4 + MY_PROLOG crc_end_4 + align 16 + main_loop_4: + mov x1, SRCDAT4 + movzx x2, x0_L + movzx x3, x0_H + shr r0, 16 + movzx x6, x0_L + movzx x7, x0_H + shr r0, 16 + CRC_XOR r1, r2, 3 + CRC_XOR r0, r3, 2 + CRC_XOR r1, r6, 1 + CRC_XOR r0, r7, 0 + xor r0, r1 + + add rD, 4 + jnz main_loop_4 + + MY_EPILOG crc_end_4 +MY_ENDP + +else +; x86 (32-bit) + +rD equ r1 +rN equ r7 +rT equ r5 + +crc_OFFS equ (REG_SIZE * 5) + +if (IS_CDECL gt 0) or (IS_LINUX gt 0) + ; cdecl or (GNU fastcall) stack: + ; (UInt32 *) table + ; size_t size + ; void * data + ; (UInt64) crc + ; ret-ip <-(r4) + data_OFFS equ (8 + crc_OFFS) + size_OFFS equ (REG_SIZE + data_OFFS) + table_OFFS equ (REG_SIZE + size_OFFS) + num_VAR equ [r4 + size_OFFS] + table_VAR equ [r4 + table_OFFS] +else + ; Windows fastcall: + ; r1 = data, r2 = size + ; stack: + ; (UInt32 *) table + ; (UInt64) crc + ; ret-ip <-(r4) + table_OFFS equ (8 + crc_OFFS) + table_VAR equ [r4 + table_OFFS] + num_VAR equ table_VAR +endif + +SRCDAT4 equ dword ptr [rD + rN * 1] + +CRC macro op0:req, op1:req, dest0:req, dest1:req, src:req, t:req + op0 dest0, DWORD PTR [rT + src * 8 + 0800h * t] + op1 dest1, DWORD PTR [rT + src * 8 + 0800h * t + 4] +endm + +CRC_XOR macro dest0:req, dest1:req, src:req, t:req + CRC xor, xor, dest0, dest1, src, t +endm + + +CRC1b macro + movzx x6, BYTE PTR [rD] + inc rD + movzx x3, x0_L + xor x6, x3 + shrd r0, r2, 8 + shr r2, 8 + CRC_XOR r0, r2, r6, 0 + dec rN +endm + +MY_PROLOG macro crc_end:req + MY_PUSH_4_REGS + + if (IS_CDECL gt 0) or (IS_LINUX gt 0) + proc_numParams = proc_numParams + 2 ; for ABI_LINUX + mov rN, [r4 + size_OFFS] + mov rD, [r4 + data_OFFS] + else + mov rN, r2 + endif + + mov x0, [r4 + crc_OFFS] + mov x2, [r4 + crc_OFFS + 4] + mov rT, table_VAR + test rN, rN + jz crc_end + @@: + test rD, 3 + jz @F + CRC1b + jnz @B + @@: + cmp rN, 8 + jb crc_end + add rN, rD + + mov num_VAR, rN + + sub rN, 4 + and rN, NOT 3 + sub rD, rN + xor r0, SRCDAT4 + add rN, 4 +endm + +MY_EPILOG macro crc_end:req + sub rN, 4 + xor r0, SRCDAT4 + + mov rD, rN + mov rN, num_VAR + sub rN, rD + crc_end: + test rN, rN + jz @F + CRC1b + jmp crc_end + @@: + MY_POP_4_REGS +endm + +MY_PROC XzCrc64UpdateT4, 5 + MY_PROLOG crc_end_4 + movzx x6, x0_L + align 16 + main_loop_4: + mov r3, SRCDAT4 + xor r3, r2 + + CRC xor, mov, r3, r2, r6, 3 + movzx x6, x0_H + shr r0, 16 + CRC_XOR r3, r2, r6, 2 + + movzx x6, x0_L + movzx x0, x0_H + CRC_XOR r3, r2, r6, 1 + CRC_XOR r3, r2, r0, 0 + movzx x6, x3_L + mov r0, r3 + + add rD, 4 + jnz main_loop_4 + + MY_EPILOG crc_end_4 +MY_ENDP + +endif ; ! x64 + +end -- cgit v1.2.3-55-g6feb