aboutsummaryrefslogtreecommitdiff
path: root/Asm
diff options
context:
space:
mode:
authorIgor Pavlov <87184205+ip7z@users.noreply.github.com>2024-06-19 00:00:00 +0000
committerIgor Pavlov <87184205+ip7z@users.noreply.github.com>2024-06-19 15:32:41 +0500
commita7a1d4a241492e81f659a920f7379c193593ebc6 (patch)
tree2ff203d3b43b3f6d18abaac21923f729fdb93e43 /Asm
parent89a73b901229c8550c172c9556ff8442ae7ac4b8 (diff)
download7zip-24.07.tar.gz
7zip-24.07.tar.bz2
7zip-24.07.zip
24.0724.07
Diffstat (limited to 'Asm')
-rw-r--r--Asm/x86/LzFindOpt.asm31
-rw-r--r--Asm/x86/LzmaDecOpt.asm40
-rw-r--r--Asm/x86/Sha1Opt.asm4
-rw-r--r--Asm/x86/Sha256Opt.asm4
4 files changed, 71 insertions, 8 deletions
diff --git a/Asm/x86/LzFindOpt.asm b/Asm/x86/LzFindOpt.asm
index 42e10bd..94c5c76 100644
--- a/Asm/x86/LzFindOpt.asm
+++ b/Asm/x86/LzFindOpt.asm
@@ -1,5 +1,5 @@
1; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function 1; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function
2; 2021-07-21: Igor Pavlov : Public domain 2; 2024-06-18: Igor Pavlov : Public domain
3; 3;
4 4
5ifndef x64 5ifndef x64
@@ -11,10 +11,31 @@ include 7zAsm.asm
11 11
12MY_ASM_START 12MY_ASM_START
13 13
14_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE' 14ifndef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
15if (IS_LINUX gt 0)
16 Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1
17else
18 Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1
19endif
20endif
15 21
22ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
23_TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE'
16MY_ALIGN macro num:req 24MY_ALIGN macro num:req
17 align num 25 align num
26 ; align 16
27endm
28else
29MY_ALIGN macro num:req
30 ; We expect that ".text" is aligned for 16-bytes.
31 ; So we don't need large alignment inside our function.
32 align 16
33endm
34endif
35
36
37MY_ALIGN_16 macro
38 MY_ALIGN 16
18endm 39endm
19 40
20MY_ALIGN_32 macro 41MY_ALIGN_32 macro
@@ -136,7 +157,11 @@ COPY_VAR_64 macro dest_var, src_var
136endm 157endm
137 158
138 159
160ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
139; MY_ALIGN_64 161; MY_ALIGN_64
162else
163 MY_ALIGN_16
164endif
140MY_PROC GetMatchesSpecN_2, 13 165MY_PROC GetMatchesSpecN_2, 13
141MY_PUSH_PRESERVED_ABI_REGS 166MY_PUSH_PRESERVED_ABI_REGS
142 mov r0, RSP 167 mov r0, RSP
@@ -508,6 +533,8 @@ fin:
508MY_POP_PRESERVED_ABI_REGS 533MY_POP_PRESERVED_ABI_REGS
509MY_ENDP 534MY_ENDP
510 535
536ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT
511_TEXT$LZFINDOPT ENDS 537_TEXT$LZFINDOPT ENDS
538endif
512 539
513end 540end
diff --git a/Asm/x86/LzmaDecOpt.asm b/Asm/x86/LzmaDecOpt.asm
index f2818e7..7c568df 100644
--- a/Asm/x86/LzmaDecOpt.asm
+++ b/Asm/x86/LzmaDecOpt.asm
@@ -1,5 +1,5 @@
1; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function 1; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
2; 2021-02-23: Igor Pavlov : Public domain 2; 2024-06-18: Igor Pavlov : Public domain
3; 3;
4; 3 - is the code compatibility version of LzmaDec_DecodeReal_*() 4; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
5; function for check at link time. 5; function for check at link time.
@@ -17,11 +17,41 @@ include 7zAsm.asm
17 17
18MY_ASM_START 18MY_ASM_START
19 19
20_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE' 20; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is defined, we use additional SEGMENT with 64-byte alignment.
21; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is not defined, we use default SEGMENT (where default 16-byte alignment of segment is expected).
22; The performance is almost identical in our tests.
23; But the performance can depend from position of lzmadec code inside instruction cache
24; or micro-op cache line (depending from low address bits in 32-byte/64-byte cache lines).
25; And 64-byte alignment provides a more consistent speed regardless
26; of the code's position in the executable.
27; But also it's possible that code without Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT can be
28; slightly faster than 64-bytes aligned code in some cases, if offset of lzmadec
29; code in 64-byte block after compilation provides better speed by some reason.
30; Note that Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT adds an extra section to the ELF file.
31; If you don't want to get that extra section, do not define Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT.
32
33ifndef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
34if (IS_LINUX gt 0)
35 Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
36else
37 Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
38endif
39endif
21 40
41ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
42_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
22MY_ALIGN macro num:req 43MY_ALIGN macro num:req
23 align num 44 align num
45 ; align 16
24endm 46endm
47else
48MY_ALIGN macro num:req
49 ; We expect that ".text" is aligned for 16-bytes.
50 ; So we don't need large alignment inside out function.
51 align 16
52endm
53endif
54
25 55
26MY_ALIGN_16 macro 56MY_ALIGN_16 macro
27 MY_ALIGN 16 57 MY_ALIGN 16
@@ -610,7 +640,11 @@ PARAM_lzma equ REG_ABI_PARAM_0
610PARAM_limit equ REG_ABI_PARAM_1 640PARAM_limit equ REG_ABI_PARAM_1
611PARAM_bufLimit equ REG_ABI_PARAM_2 641PARAM_bufLimit equ REG_ABI_PARAM_2
612 642
643ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
613; MY_ALIGN_64 644; MY_ALIGN_64
645else
646 MY_ALIGN_16
647endif
614MY_PROC LzmaDec_DecodeReal_3, 3 648MY_PROC LzmaDec_DecodeReal_3, 3
615MY_PUSH_PRESERVED_ABI_REGS 649MY_PUSH_PRESERVED_ABI_REGS
616 650
@@ -1298,6 +1332,8 @@ fin:
1298MY_POP_PRESERVED_ABI_REGS 1332MY_POP_PRESERVED_ABI_REGS
1299MY_ENDP 1333MY_ENDP
1300 1334
1335ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
1301_TEXT$LZMADECOPT ENDS 1336_TEXT$LZMADECOPT ENDS
1337endif
1302 1338
1303end 1339end
diff --git a/Asm/x86/Sha1Opt.asm b/Asm/x86/Sha1Opt.asm
index 3495fd1..0b63aeb 100644
--- a/Asm/x86/Sha1Opt.asm
+++ b/Asm/x86/Sha1Opt.asm
@@ -1,5 +1,5 @@
1; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions 1; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions
2; 2021-03-10 : Igor Pavlov : Public domain 2; 2024-06-16 : Igor Pavlov : Public domain
3 3
4include 7zAsm.asm 4include 7zAsm.asm
5 5
@@ -20,7 +20,7 @@ MY_ASM_START
20 20
21 21
22 22
23CONST SEGMENT 23CONST SEGMENT READONLY
24 24
25align 16 25align 16
26Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0 26Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0
diff --git a/Asm/x86/Sha256Opt.asm b/Asm/x86/Sha256Opt.asm
index 3e9f6ed..bc2f9da 100644
--- a/Asm/x86/Sha256Opt.asm
+++ b/Asm/x86/Sha256Opt.asm
@@ -1,5 +1,5 @@
1; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions 1; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions
2; 2022-04-17 : Igor Pavlov : Public domain 2; 2024-06-16 : Igor Pavlov : Public domain
3 3
4include 7zAsm.asm 4include 7zAsm.asm
5 5
@@ -20,7 +20,7 @@ endif
20EXTRN K_CONST:xmmword 20EXTRN K_CONST:xmmword
21@ 21@
22 22
23CONST SEGMENT 23CONST SEGMENT READONLY
24 24
25align 16 25align 16
26Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 26Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12