From a7a1d4a241492e81f659a920f7379c193593ebc6 Mon Sep 17 00:00:00 2001 From: Igor Pavlov <87184205+ip7z@users.noreply.github.com> Date: Wed, 19 Jun 2024 00:00:00 +0000 Subject: 24.07 --- Asm/x86/LzFindOpt.asm | 31 +++++++++++++++++++++++++++++-- Asm/x86/LzmaDecOpt.asm | 40 ++++++++++++++++++++++++++++++++++++++-- Asm/x86/Sha1Opt.asm | 4 ++-- Asm/x86/Sha256Opt.asm | 4 ++-- 4 files changed, 71 insertions(+), 8 deletions(-) (limited to 'Asm') diff --git a/Asm/x86/LzFindOpt.asm b/Asm/x86/LzFindOpt.asm index 42e10bd..94c5c76 100644 --- a/Asm/x86/LzFindOpt.asm +++ b/Asm/x86/LzFindOpt.asm @@ -1,5 +1,5 @@ ; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function -; 2021-07-21: Igor Pavlov : Public domain +; 2024-06-18: Igor Pavlov : Public domain ; ifndef x64 @@ -11,10 +11,31 @@ include 7zAsm.asm MY_ASM_START -_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE' +ifndef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT +if (IS_LINUX gt 0) + Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1 +else + Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1 +endif +endif +ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT +_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE' MY_ALIGN macro num:req align num + ; align 16 +endm +else +MY_ALIGN macro num:req + ; We expect that ".text" is aligned for 16-bytes. + ; So we don't need large alignment inside our function. + align 16 +endm +endif + + +MY_ALIGN_16 macro + MY_ALIGN 16 endm MY_ALIGN_32 macro @@ -136,7 +157,11 @@ COPY_VAR_64 macro dest_var, src_var endm +ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT ; MY_ALIGN_64 +else + MY_ALIGN_16 +endif MY_PROC GetMatchesSpecN_2, 13 MY_PUSH_PRESERVED_ABI_REGS mov r0, RSP @@ -508,6 +533,8 @@ fin: MY_POP_PRESERVED_ABI_REGS MY_ENDP +ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT _TEXT$LZFINDOPT ENDS +endif end diff --git a/Asm/x86/LzmaDecOpt.asm b/Asm/x86/LzmaDecOpt.asm index f2818e7..7c568df 100644 --- a/Asm/x86/LzmaDecOpt.asm +++ b/Asm/x86/LzmaDecOpt.asm @@ -1,5 +1,5 @@ ; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function -; 2021-02-23: Igor Pavlov : Public domain +; 2024-06-18: Igor Pavlov : Public domain ; ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*() ; function for check at link time. @@ -17,11 +17,41 @@ include 7zAsm.asm MY_ASM_START -_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE' +; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is defined, we use additional SEGMENT with 64-byte alignment. +; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is not defined, we use default SEGMENT (where default 16-byte alignment of segment is expected). +; The performance is almost identical in our tests. +; But the performance can depend from position of lzmadec code inside instruction cache +; or micro-op cache line (depending from low address bits in 32-byte/64-byte cache lines). +; And 64-byte alignment provides a more consistent speed regardless +; of the code's position in the executable. +; But also it's possible that code without Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT can be +; slightly faster than 64-bytes aligned code in some cases, if offset of lzmadec +; code in 64-byte block after compilation provides better speed by some reason. +; Note that Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT adds an extra section to the ELF file. +; If you don't want to get that extra section, do not define Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT. + +ifndef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT +if (IS_LINUX gt 0) + Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1 +else + Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1 +endif +endif +ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT +_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE' MY_ALIGN macro num:req align num + ; align 16 endm +else +MY_ALIGN macro num:req + ; We expect that ".text" is aligned for 16-bytes. + ; So we don't need large alignment inside out function. + align 16 +endm +endif + MY_ALIGN_16 macro MY_ALIGN 16 @@ -610,7 +640,11 @@ PARAM_lzma equ REG_ABI_PARAM_0 PARAM_limit equ REG_ABI_PARAM_1 PARAM_bufLimit equ REG_ABI_PARAM_2 +ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT ; MY_ALIGN_64 +else + MY_ALIGN_16 +endif MY_PROC LzmaDec_DecodeReal_3, 3 MY_PUSH_PRESERVED_ABI_REGS @@ -1298,6 +1332,8 @@ fin: MY_POP_PRESERVED_ABI_REGS MY_ENDP +ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT _TEXT$LZMADECOPT ENDS +endif end diff --git a/Asm/x86/Sha1Opt.asm b/Asm/x86/Sha1Opt.asm index 3495fd1..0b63aeb 100644 --- a/Asm/x86/Sha1Opt.asm +++ b/Asm/x86/Sha1Opt.asm @@ -1,5 +1,5 @@ ; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions -; 2021-03-10 : Igor Pavlov : Public domain +; 2024-06-16 : Igor Pavlov : Public domain include 7zAsm.asm @@ -20,7 +20,7 @@ MY_ASM_START -CONST SEGMENT +CONST SEGMENT READONLY align 16 Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0 diff --git a/Asm/x86/Sha256Opt.asm b/Asm/x86/Sha256Opt.asm index 3e9f6ed..bc2f9da 100644 --- a/Asm/x86/Sha256Opt.asm +++ b/Asm/x86/Sha256Opt.asm @@ -1,5 +1,5 @@ ; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions -; 2022-04-17 : Igor Pavlov : Public domain +; 2024-06-16 : Igor Pavlov : Public domain include 7zAsm.asm @@ -20,7 +20,7 @@ endif EXTRN K_CONST:xmmword @ -CONST SEGMENT +CONST SEGMENT READONLY align 16 Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 -- cgit v1.2.3-55-g6feb