aboutsummaryrefslogtreecommitdiff
path: root/Asm/x86/LzmaDecOpt.asm
diff options
context:
space:
mode:
authorIgor Pavlov <87184205+ip7z@users.noreply.github.com>2024-06-19 00:00:00 +0000
committerIgor Pavlov <87184205+ip7z@users.noreply.github.com>2024-06-19 15:32:41 +0500
commita7a1d4a241492e81f659a920f7379c193593ebc6 (patch)
tree2ff203d3b43b3f6d18abaac21923f729fdb93e43 /Asm/x86/LzmaDecOpt.asm
parent89a73b901229c8550c172c9556ff8442ae7ac4b8 (diff)
download7zip-24.07.tar.gz
7zip-24.07.tar.bz2
7zip-24.07.zip
24.0724.07
Diffstat (limited to '')
-rw-r--r--Asm/x86/LzmaDecOpt.asm40
1 files changed, 38 insertions, 2 deletions
diff --git a/Asm/x86/LzmaDecOpt.asm b/Asm/x86/LzmaDecOpt.asm
index f2818e7..7c568df 100644
--- a/Asm/x86/LzmaDecOpt.asm
+++ b/Asm/x86/LzmaDecOpt.asm
@@ -1,5 +1,5 @@
1; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function 1; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
2; 2021-02-23: Igor Pavlov : Public domain 2; 2024-06-18: Igor Pavlov : Public domain
3; 3;
4; 3 - is the code compatibility version of LzmaDec_DecodeReal_*() 4; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
5; function for check at link time. 5; function for check at link time.
@@ -17,11 +17,41 @@ include 7zAsm.asm
17 17
18MY_ASM_START 18MY_ASM_START
19 19
20_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE' 20; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is defined, we use additional SEGMENT with 64-byte alignment.
21; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is not defined, we use default SEGMENT (where default 16-byte alignment of segment is expected).
22; The performance is almost identical in our tests.
23; But the performance can depend from position of lzmadec code inside instruction cache
24; or micro-op cache line (depending from low address bits in 32-byte/64-byte cache lines).
25; And 64-byte alignment provides a more consistent speed regardless
26; of the code's position in the executable.
27; But also it's possible that code without Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT can be
28; slightly faster than 64-bytes aligned code in some cases, if offset of lzmadec
29; code in 64-byte block after compilation provides better speed by some reason.
30; Note that Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT adds an extra section to the ELF file.
31; If you don't want to get that extra section, do not define Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT.
32
33ifndef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
34if (IS_LINUX gt 0)
35 Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
36else
37 Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
38endif
39endif
21 40
41ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
42_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
22MY_ALIGN macro num:req 43MY_ALIGN macro num:req
23 align num 44 align num
45 ; align 16
24endm 46endm
47else
48MY_ALIGN macro num:req
49 ; We expect that ".text" is aligned for 16-bytes.
50 ; So we don't need large alignment inside out function.
51 align 16
52endm
53endif
54
25 55
26MY_ALIGN_16 macro 56MY_ALIGN_16 macro
27 MY_ALIGN 16 57 MY_ALIGN 16
@@ -610,7 +640,11 @@ PARAM_lzma equ REG_ABI_PARAM_0
610PARAM_limit equ REG_ABI_PARAM_1 640PARAM_limit equ REG_ABI_PARAM_1
611PARAM_bufLimit equ REG_ABI_PARAM_2 641PARAM_bufLimit equ REG_ABI_PARAM_2
612 642
643ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
613; MY_ALIGN_64 644; MY_ALIGN_64
645else
646 MY_ALIGN_16
647endif
614MY_PROC LzmaDec_DecodeReal_3, 3 648MY_PROC LzmaDec_DecodeReal_3, 3
615MY_PUSH_PRESERVED_ABI_REGS 649MY_PUSH_PRESERVED_ABI_REGS
616 650
@@ -1298,6 +1332,8 @@ fin:
1298MY_POP_PRESERVED_ABI_REGS 1332MY_POP_PRESERVED_ABI_REGS
1299MY_ENDP 1333MY_ENDP
1300 1334
1335ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
1301_TEXT$LZMADECOPT ENDS 1336_TEXT$LZMADECOPT ENDS
1337endif
1302 1338
1303end 1339end