diff options
Diffstat (limited to 'Asm')
-rw-r--r-- | Asm/x86/LzFindOpt.asm | 31 | ||||
-rw-r--r-- | Asm/x86/LzmaDecOpt.asm | 40 | ||||
-rw-r--r-- | Asm/x86/Sha1Opt.asm | 4 | ||||
-rw-r--r-- | Asm/x86/Sha256Opt.asm | 4 |
4 files changed, 71 insertions, 8 deletions
diff --git a/Asm/x86/LzFindOpt.asm b/Asm/x86/LzFindOpt.asm index 42e10bd..94c5c76 100644 --- a/Asm/x86/LzFindOpt.asm +++ b/Asm/x86/LzFindOpt.asm | |||
@@ -1,5 +1,5 @@ | |||
1 | ; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function | 1 | ; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function |
2 | ; 2021-07-21: Igor Pavlov : Public domain | 2 | ; 2024-06-18: Igor Pavlov : Public domain |
3 | ; | 3 | ; |
4 | 4 | ||
5 | ifndef x64 | 5 | ifndef x64 |
@@ -11,10 +11,31 @@ include 7zAsm.asm | |||
11 | 11 | ||
12 | MY_ASM_START | 12 | MY_ASM_START |
13 | 13 | ||
14 | _TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE' | 14 | ifndef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT |
15 | if (IS_LINUX gt 0) | ||
16 | Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1 | ||
17 | else | ||
18 | Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1 | ||
19 | endif | ||
20 | endif | ||
15 | 21 | ||
22 | ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT | ||
23 | _TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE' | ||
16 | MY_ALIGN macro num:req | 24 | MY_ALIGN macro num:req |
17 | align num | 25 | align num |
26 | ; align 16 | ||
27 | endm | ||
28 | else | ||
29 | MY_ALIGN macro num:req | ||
30 | ; We expect that ".text" is aligned for 16-bytes. | ||
31 | ; So we don't need large alignment inside our function. | ||
32 | align 16 | ||
33 | endm | ||
34 | endif | ||
35 | |||
36 | |||
37 | MY_ALIGN_16 macro | ||
38 | MY_ALIGN 16 | ||
18 | endm | 39 | endm |
19 | 40 | ||
20 | MY_ALIGN_32 macro | 41 | MY_ALIGN_32 macro |
@@ -136,7 +157,11 @@ COPY_VAR_64 macro dest_var, src_var | |||
136 | endm | 157 | endm |
137 | 158 | ||
138 | 159 | ||
160 | ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT | ||
139 | ; MY_ALIGN_64 | 161 | ; MY_ALIGN_64 |
162 | else | ||
163 | MY_ALIGN_16 | ||
164 | endif | ||
140 | MY_PROC GetMatchesSpecN_2, 13 | 165 | MY_PROC GetMatchesSpecN_2, 13 |
141 | MY_PUSH_PRESERVED_ABI_REGS | 166 | MY_PUSH_PRESERVED_ABI_REGS |
142 | mov r0, RSP | 167 | mov r0, RSP |
@@ -508,6 +533,8 @@ fin: | |||
508 | MY_POP_PRESERVED_ABI_REGS | 533 | MY_POP_PRESERVED_ABI_REGS |
509 | MY_ENDP | 534 | MY_ENDP |
510 | 535 | ||
536 | ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT | ||
511 | _TEXT$LZFINDOPT ENDS | 537 | _TEXT$LZFINDOPT ENDS |
538 | endif | ||
512 | 539 | ||
513 | end | 540 | end |
diff --git a/Asm/x86/LzmaDecOpt.asm b/Asm/x86/LzmaDecOpt.asm index f2818e7..7c568df 100644 --- a/Asm/x86/LzmaDecOpt.asm +++ b/Asm/x86/LzmaDecOpt.asm | |||
@@ -1,5 +1,5 @@ | |||
1 | ; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function | 1 | ; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function |
2 | ; 2021-02-23: Igor Pavlov : Public domain | 2 | ; 2024-06-18: Igor Pavlov : Public domain |
3 | ; | 3 | ; |
4 | ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*() | 4 | ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*() |
5 | ; function for check at link time. | 5 | ; function for check at link time. |
@@ -17,11 +17,41 @@ include 7zAsm.asm | |||
17 | 17 | ||
18 | MY_ASM_START | 18 | MY_ASM_START |
19 | 19 | ||
20 | _TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE' | 20 | ; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is defined, we use additional SEGMENT with 64-byte alignment. |
21 | ; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is not defined, we use default SEGMENT (where default 16-byte alignment of segment is expected). | ||
22 | ; The performance is almost identical in our tests. | ||
23 | ; But the performance can depend from position of lzmadec code inside instruction cache | ||
24 | ; or micro-op cache line (depending from low address bits in 32-byte/64-byte cache lines). | ||
25 | ; And 64-byte alignment provides a more consistent speed regardless | ||
26 | ; of the code's position in the executable. | ||
27 | ; But also it's possible that code without Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT can be | ||
28 | ; slightly faster than 64-bytes aligned code in some cases, if offset of lzmadec | ||
29 | ; code in 64-byte block after compilation provides better speed by some reason. | ||
30 | ; Note that Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT adds an extra section to the ELF file. | ||
31 | ; If you don't want to get that extra section, do not define Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT. | ||
32 | |||
33 | ifndef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT | ||
34 | if (IS_LINUX gt 0) | ||
35 | Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1 | ||
36 | else | ||
37 | Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1 | ||
38 | endif | ||
39 | endif | ||
21 | 40 | ||
41 | ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT | ||
42 | _TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE' | ||
22 | MY_ALIGN macro num:req | 43 | MY_ALIGN macro num:req |
23 | align num | 44 | align num |
45 | ; align 16 | ||
24 | endm | 46 | endm |
47 | else | ||
48 | MY_ALIGN macro num:req | ||
49 | ; We expect that ".text" is aligned for 16-bytes. | ||
50 | ; So we don't need large alignment inside out function. | ||
51 | align 16 | ||
52 | endm | ||
53 | endif | ||
54 | |||
25 | 55 | ||
26 | MY_ALIGN_16 macro | 56 | MY_ALIGN_16 macro |
27 | MY_ALIGN 16 | 57 | MY_ALIGN 16 |
@@ -610,7 +640,11 @@ PARAM_lzma equ REG_ABI_PARAM_0 | |||
610 | PARAM_limit equ REG_ABI_PARAM_1 | 640 | PARAM_limit equ REG_ABI_PARAM_1 |
611 | PARAM_bufLimit equ REG_ABI_PARAM_2 | 641 | PARAM_bufLimit equ REG_ABI_PARAM_2 |
612 | 642 | ||
643 | ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT | ||
613 | ; MY_ALIGN_64 | 644 | ; MY_ALIGN_64 |
645 | else | ||
646 | MY_ALIGN_16 | ||
647 | endif | ||
614 | MY_PROC LzmaDec_DecodeReal_3, 3 | 648 | MY_PROC LzmaDec_DecodeReal_3, 3 |
615 | MY_PUSH_PRESERVED_ABI_REGS | 649 | MY_PUSH_PRESERVED_ABI_REGS |
616 | 650 | ||
@@ -1298,6 +1332,8 @@ fin: | |||
1298 | MY_POP_PRESERVED_ABI_REGS | 1332 | MY_POP_PRESERVED_ABI_REGS |
1299 | MY_ENDP | 1333 | MY_ENDP |
1300 | 1334 | ||
1335 | ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT | ||
1301 | _TEXT$LZMADECOPT ENDS | 1336 | _TEXT$LZMADECOPT ENDS |
1337 | endif | ||
1302 | 1338 | ||
1303 | end | 1339 | end |
diff --git a/Asm/x86/Sha1Opt.asm b/Asm/x86/Sha1Opt.asm index 3495fd1..0b63aeb 100644 --- a/Asm/x86/Sha1Opt.asm +++ b/Asm/x86/Sha1Opt.asm | |||
@@ -1,5 +1,5 @@ | |||
1 | ; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions | 1 | ; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions |
2 | ; 2021-03-10 : Igor Pavlov : Public domain | 2 | ; 2024-06-16 : Igor Pavlov : Public domain |
3 | 3 | ||
4 | include 7zAsm.asm | 4 | include 7zAsm.asm |
5 | 5 | ||
@@ -20,7 +20,7 @@ MY_ASM_START | |||
20 | 20 | ||
21 | 21 | ||
22 | 22 | ||
23 | CONST SEGMENT | 23 | CONST SEGMENT READONLY |
24 | 24 | ||
25 | align 16 | 25 | align 16 |
26 | Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0 | 26 | Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0 |
diff --git a/Asm/x86/Sha256Opt.asm b/Asm/x86/Sha256Opt.asm index 3e9f6ed..bc2f9da 100644 --- a/Asm/x86/Sha256Opt.asm +++ b/Asm/x86/Sha256Opt.asm | |||
@@ -1,5 +1,5 @@ | |||
1 | ; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions | 1 | ; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions |
2 | ; 2022-04-17 : Igor Pavlov : Public domain | 2 | ; 2024-06-16 : Igor Pavlov : Public domain |
3 | 3 | ||
4 | include 7zAsm.asm | 4 | include 7zAsm.asm |
5 | 5 | ||
@@ -20,7 +20,7 @@ endif | |||
20 | EXTRN K_CONST:xmmword | 20 | EXTRN K_CONST:xmmword |
21 | @ | 21 | @ |
22 | 22 | ||
23 | CONST SEGMENT | 23 | CONST SEGMENT READONLY |
24 | 24 | ||
25 | align 16 | 25 | align 16 |
26 | Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 | 26 | Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 |