diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-06-19 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2024-06-19 15:32:41 +0500 |
commit | a7a1d4a241492e81f659a920f7379c193593ebc6 (patch) | |
tree | 2ff203d3b43b3f6d18abaac21923f729fdb93e43 /Asm/x86/LzmaDecOpt.asm | |
parent | 89a73b901229c8550c172c9556ff8442ae7ac4b8 (diff) | |
download | 7zip-24.07.tar.gz 7zip-24.07.tar.bz2 7zip-24.07.zip |
24.0724.07
Diffstat (limited to '')
-rw-r--r-- | Asm/x86/LzmaDecOpt.asm | 40 |
1 files changed, 38 insertions, 2 deletions
diff --git a/Asm/x86/LzmaDecOpt.asm b/Asm/x86/LzmaDecOpt.asm index f2818e7..7c568df 100644 --- a/Asm/x86/LzmaDecOpt.asm +++ b/Asm/x86/LzmaDecOpt.asm | |||
@@ -1,5 +1,5 @@ | |||
1 | ; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function | 1 | ; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function |
2 | ; 2021-02-23: Igor Pavlov : Public domain | 2 | ; 2024-06-18: Igor Pavlov : Public domain |
3 | ; | 3 | ; |
4 | ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*() | 4 | ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*() |
5 | ; function for check at link time. | 5 | ; function for check at link time. |
@@ -17,11 +17,41 @@ include 7zAsm.asm | |||
17 | 17 | ||
18 | MY_ASM_START | 18 | MY_ASM_START |
19 | 19 | ||
20 | _TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE' | 20 | ; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is defined, we use additional SEGMENT with 64-byte alignment. |
21 | ; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is not defined, we use default SEGMENT (where default 16-byte alignment of segment is expected). | ||
22 | ; The performance is almost identical in our tests. | ||
23 | ; But the performance can depend from position of lzmadec code inside instruction cache | ||
24 | ; or micro-op cache line (depending from low address bits in 32-byte/64-byte cache lines). | ||
25 | ; And 64-byte alignment provides a more consistent speed regardless | ||
26 | ; of the code's position in the executable. | ||
27 | ; But also it's possible that code without Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT can be | ||
28 | ; slightly faster than 64-bytes aligned code in some cases, if offset of lzmadec | ||
29 | ; code in 64-byte block after compilation provides better speed by some reason. | ||
30 | ; Note that Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT adds an extra section to the ELF file. | ||
31 | ; If you don't want to get that extra section, do not define Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT. | ||
32 | |||
33 | ifndef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT | ||
34 | if (IS_LINUX gt 0) | ||
35 | Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1 | ||
36 | else | ||
37 | Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1 | ||
38 | endif | ||
39 | endif | ||
21 | 40 | ||
41 | ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT | ||
42 | _TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE' | ||
22 | MY_ALIGN macro num:req | 43 | MY_ALIGN macro num:req |
23 | align num | 44 | align num |
45 | ; align 16 | ||
24 | endm | 46 | endm |
47 | else | ||
48 | MY_ALIGN macro num:req | ||
49 | ; We expect that ".text" is aligned for 16-bytes. | ||
50 | ; So we don't need large alignment inside out function. | ||
51 | align 16 | ||
52 | endm | ||
53 | endif | ||
54 | |||
25 | 55 | ||
26 | MY_ALIGN_16 macro | 56 | MY_ALIGN_16 macro |
27 | MY_ALIGN 16 | 57 | MY_ALIGN 16 |
@@ -610,7 +640,11 @@ PARAM_lzma equ REG_ABI_PARAM_0 | |||
610 | PARAM_limit equ REG_ABI_PARAM_1 | 640 | PARAM_limit equ REG_ABI_PARAM_1 |
611 | PARAM_bufLimit equ REG_ABI_PARAM_2 | 641 | PARAM_bufLimit equ REG_ABI_PARAM_2 |
612 | 642 | ||
643 | ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT | ||
613 | ; MY_ALIGN_64 | 644 | ; MY_ALIGN_64 |
645 | else | ||
646 | MY_ALIGN_16 | ||
647 | endif | ||
614 | MY_PROC LzmaDec_DecodeReal_3, 3 | 648 | MY_PROC LzmaDec_DecodeReal_3, 3 |
615 | MY_PUSH_PRESERVED_ABI_REGS | 649 | MY_PUSH_PRESERVED_ABI_REGS |
616 | 650 | ||
@@ -1298,6 +1332,8 @@ fin: | |||
1298 | MY_POP_PRESERVED_ABI_REGS | 1332 | MY_POP_PRESERVED_ABI_REGS |
1299 | MY_ENDP | 1333 | MY_ENDP |
1300 | 1334 | ||
1335 | ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT | ||
1301 | _TEXT$LZMADECOPT ENDS | 1336 | _TEXT$LZMADECOPT ENDS |
1337 | endif | ||
1302 | 1338 | ||
1303 | end | 1339 | end |