diff options
Diffstat (limited to '')
| -rw-r--r-- | Asm/x86/LzFindOpt.asm | 31 | ||||
| -rw-r--r-- | Asm/x86/LzmaDecOpt.asm | 40 | ||||
| -rw-r--r-- | Asm/x86/Sha1Opt.asm | 4 | ||||
| -rw-r--r-- | Asm/x86/Sha256Opt.asm | 4 |
4 files changed, 71 insertions, 8 deletions
diff --git a/Asm/x86/LzFindOpt.asm b/Asm/x86/LzFindOpt.asm index 42e10bd..94c5c76 100644 --- a/Asm/x86/LzFindOpt.asm +++ b/Asm/x86/LzFindOpt.asm | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function | 1 | ; LzFindOpt.asm -- ASM version of GetMatchesSpecN_2() function |
| 2 | ; 2021-07-21: Igor Pavlov : Public domain | 2 | ; 2024-06-18: Igor Pavlov : Public domain |
| 3 | ; | 3 | ; |
| 4 | 4 | ||
| 5 | ifndef x64 | 5 | ifndef x64 |
| @@ -11,10 +11,31 @@ include 7zAsm.asm | |||
| 11 | 11 | ||
| 12 | MY_ASM_START | 12 | MY_ASM_START |
| 13 | 13 | ||
| 14 | _TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE' | 14 | ifndef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT |
| 15 | if (IS_LINUX gt 0) | ||
| 16 | Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1 | ||
| 17 | else | ||
| 18 | Z7_LZ_FIND_OPT_ASM_USE_SEGMENT equ 1 | ||
| 19 | endif | ||
| 20 | endif | ||
| 15 | 21 | ||
| 22 | ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT | ||
| 23 | _TEXT$LZFINDOPT SEGMENT ALIGN(64) 'CODE' | ||
| 16 | MY_ALIGN macro num:req | 24 | MY_ALIGN macro num:req |
| 17 | align num | 25 | align num |
| 26 | ; align 16 | ||
| 27 | endm | ||
| 28 | else | ||
| 29 | MY_ALIGN macro num:req | ||
| 30 | ; We expect that ".text" is aligned for 16-bytes. | ||
| 31 | ; So we don't need large alignment inside our function. | ||
| 32 | align 16 | ||
| 33 | endm | ||
| 34 | endif | ||
| 35 | |||
| 36 | |||
| 37 | MY_ALIGN_16 macro | ||
| 38 | MY_ALIGN 16 | ||
| 18 | endm | 39 | endm |
| 19 | 40 | ||
| 20 | MY_ALIGN_32 macro | 41 | MY_ALIGN_32 macro |
| @@ -136,7 +157,11 @@ COPY_VAR_64 macro dest_var, src_var | |||
| 136 | endm | 157 | endm |
| 137 | 158 | ||
| 138 | 159 | ||
| 160 | ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT | ||
| 139 | ; MY_ALIGN_64 | 161 | ; MY_ALIGN_64 |
| 162 | else | ||
| 163 | MY_ALIGN_16 | ||
| 164 | endif | ||
| 140 | MY_PROC GetMatchesSpecN_2, 13 | 165 | MY_PROC GetMatchesSpecN_2, 13 |
| 141 | MY_PUSH_PRESERVED_ABI_REGS | 166 | MY_PUSH_PRESERVED_ABI_REGS |
| 142 | mov r0, RSP | 167 | mov r0, RSP |
| @@ -508,6 +533,8 @@ fin: | |||
| 508 | MY_POP_PRESERVED_ABI_REGS | 533 | MY_POP_PRESERVED_ABI_REGS |
| 509 | MY_ENDP | 534 | MY_ENDP |
| 510 | 535 | ||
| 536 | ifdef Z7_LZ_FIND_OPT_ASM_USE_SEGMENT | ||
| 511 | _TEXT$LZFINDOPT ENDS | 537 | _TEXT$LZFINDOPT ENDS |
| 538 | endif | ||
| 512 | 539 | ||
| 513 | end | 540 | end |
diff --git a/Asm/x86/LzmaDecOpt.asm b/Asm/x86/LzmaDecOpt.asm index f2818e7..7c568df 100644 --- a/Asm/x86/LzmaDecOpt.asm +++ b/Asm/x86/LzmaDecOpt.asm | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function | 1 | ; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function |
| 2 | ; 2021-02-23: Igor Pavlov : Public domain | 2 | ; 2024-06-18: Igor Pavlov : Public domain |
| 3 | ; | 3 | ; |
| 4 | ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*() | 4 | ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*() |
| 5 | ; function for check at link time. | 5 | ; function for check at link time. |
| @@ -17,11 +17,41 @@ include 7zAsm.asm | |||
| 17 | 17 | ||
| 18 | MY_ASM_START | 18 | MY_ASM_START |
| 19 | 19 | ||
| 20 | _TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE' | 20 | ; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is defined, we use additional SEGMENT with 64-byte alignment. |
| 21 | ; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is not defined, we use default SEGMENT (where default 16-byte alignment of segment is expected). | ||
| 22 | ; The performance is almost identical in our tests. | ||
| 23 | ; But the performance can depend from position of lzmadec code inside instruction cache | ||
| 24 | ; or micro-op cache line (depending from low address bits in 32-byte/64-byte cache lines). | ||
| 25 | ; And 64-byte alignment provides a more consistent speed regardless | ||
| 26 | ; of the code's position in the executable. | ||
| 27 | ; But also it's possible that code without Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT can be | ||
| 28 | ; slightly faster than 64-bytes aligned code in some cases, if offset of lzmadec | ||
| 29 | ; code in 64-byte block after compilation provides better speed by some reason. | ||
| 30 | ; Note that Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT adds an extra section to the ELF file. | ||
| 31 | ; If you don't want to get that extra section, do not define Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT. | ||
| 32 | |||
| 33 | ifndef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT | ||
| 34 | if (IS_LINUX gt 0) | ||
| 35 | Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1 | ||
| 36 | else | ||
| 37 | Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1 | ||
| 38 | endif | ||
| 39 | endif | ||
| 21 | 40 | ||
| 41 | ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT | ||
| 42 | _TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE' | ||
| 22 | MY_ALIGN macro num:req | 43 | MY_ALIGN macro num:req |
| 23 | align num | 44 | align num |
| 45 | ; align 16 | ||
| 24 | endm | 46 | endm |
| 47 | else | ||
| 48 | MY_ALIGN macro num:req | ||
| 49 | ; We expect that ".text" is aligned for 16-bytes. | ||
| 50 | ; So we don't need large alignment inside out function. | ||
| 51 | align 16 | ||
| 52 | endm | ||
| 53 | endif | ||
| 54 | |||
| 25 | 55 | ||
| 26 | MY_ALIGN_16 macro | 56 | MY_ALIGN_16 macro |
| 27 | MY_ALIGN 16 | 57 | MY_ALIGN 16 |
| @@ -610,7 +640,11 @@ PARAM_lzma equ REG_ABI_PARAM_0 | |||
| 610 | PARAM_limit equ REG_ABI_PARAM_1 | 640 | PARAM_limit equ REG_ABI_PARAM_1 |
| 611 | PARAM_bufLimit equ REG_ABI_PARAM_2 | 641 | PARAM_bufLimit equ REG_ABI_PARAM_2 |
| 612 | 642 | ||
| 643 | ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT | ||
| 613 | ; MY_ALIGN_64 | 644 | ; MY_ALIGN_64 |
| 645 | else | ||
| 646 | MY_ALIGN_16 | ||
| 647 | endif | ||
| 614 | MY_PROC LzmaDec_DecodeReal_3, 3 | 648 | MY_PROC LzmaDec_DecodeReal_3, 3 |
| 615 | MY_PUSH_PRESERVED_ABI_REGS | 649 | MY_PUSH_PRESERVED_ABI_REGS |
| 616 | 650 | ||
| @@ -1298,6 +1332,8 @@ fin: | |||
| 1298 | MY_POP_PRESERVED_ABI_REGS | 1332 | MY_POP_PRESERVED_ABI_REGS |
| 1299 | MY_ENDP | 1333 | MY_ENDP |
| 1300 | 1334 | ||
| 1335 | ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT | ||
| 1301 | _TEXT$LZMADECOPT ENDS | 1336 | _TEXT$LZMADECOPT ENDS |
| 1337 | endif | ||
| 1302 | 1338 | ||
| 1303 | end | 1339 | end |
diff --git a/Asm/x86/Sha1Opt.asm b/Asm/x86/Sha1Opt.asm index 3495fd1..0b63aeb 100644 --- a/Asm/x86/Sha1Opt.asm +++ b/Asm/x86/Sha1Opt.asm | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions | 1 | ; Sha1Opt.asm -- SHA-1 optimized code for SHA-1 x86 hardware instructions |
| 2 | ; 2021-03-10 : Igor Pavlov : Public domain | 2 | ; 2024-06-16 : Igor Pavlov : Public domain |
| 3 | 3 | ||
| 4 | include 7zAsm.asm | 4 | include 7zAsm.asm |
| 5 | 5 | ||
| @@ -20,7 +20,7 @@ MY_ASM_START | |||
| 20 | 20 | ||
| 21 | 21 | ||
| 22 | 22 | ||
| 23 | CONST SEGMENT | 23 | CONST SEGMENT READONLY |
| 24 | 24 | ||
| 25 | align 16 | 25 | align 16 |
| 26 | Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0 | 26 | Reverse_Endian_Mask db 15,14,13,12, 11,10,9,8, 7,6,5,4, 3,2,1,0 |
diff --git a/Asm/x86/Sha256Opt.asm b/Asm/x86/Sha256Opt.asm index 3e9f6ed..bc2f9da 100644 --- a/Asm/x86/Sha256Opt.asm +++ b/Asm/x86/Sha256Opt.asm | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions | 1 | ; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions |
| 2 | ; 2022-04-17 : Igor Pavlov : Public domain | 2 | ; 2024-06-16 : Igor Pavlov : Public domain |
| 3 | 3 | ||
| 4 | include 7zAsm.asm | 4 | include 7zAsm.asm |
| 5 | 5 | ||
| @@ -20,7 +20,7 @@ endif | |||
| 20 | EXTRN K_CONST:xmmword | 20 | EXTRN K_CONST:xmmword |
| 21 | @ | 21 | @ |
| 22 | 22 | ||
| 23 | CONST SEGMENT | 23 | CONST SEGMENT READONLY |
| 24 | 24 | ||
| 25 | align 16 | 25 | align 16 |
| 26 | Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 | 26 | Reverse_Endian_Mask db 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 |
