diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2022-06-20 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2023-12-17 13:35:20 +0500 |
commit | a3e1d227377188734b82f023f96f8e25dc40f3e6 (patch) | |
tree | 23cad8d47eb23d26ea727b4f7f4a65124f724065 /Asm | |
parent | f19f813537c7aea1c20749c914e756b54a9c3cf5 (diff) | |
download | 7zip-22.00.tar.gz 7zip-22.00.tar.bz2 7zip-22.00.zip |
22.0022.00
Diffstat (limited to 'Asm')
-rw-r--r-- | Asm/x86/7zAsm.asm | 7 | ||||
-rw-r--r-- | Asm/x86/Sha256Opt.asm | 32 |
2 files changed, 28 insertions, 11 deletions
diff --git a/Asm/x86/7zAsm.asm b/Asm/x86/7zAsm.asm index 6275bb7..19c40da 100644 --- a/Asm/x86/7zAsm.asm +++ b/Asm/x86/7zAsm.asm | |||
@@ -1,7 +1,12 @@ | |||
1 | ; 7zAsm.asm -- ASM macros | 1 | ; 7zAsm.asm -- ASM macros |
2 | ; 2021-12-25 : Igor Pavlov : Public domain | 2 | ; 2022-05-16 : Igor Pavlov : Public domain |
3 | 3 | ||
4 | 4 | ||
5 | ; UASM can require these changes | ||
6 | ; OPTION FRAMEPRESERVEFLAGS:ON | ||
7 | ; OPTION PROLOGUE:NONE | ||
8 | ; OPTION EPILOGUE:NONE | ||
9 | |||
5 | ifdef @wordsize | 10 | ifdef @wordsize |
6 | ; @wordsize is defined only in JWASM and ASMC and is not defined in MASM | 11 | ; @wordsize is defined only in JWASM and ASMC and is not defined in MASM |
7 | ; @wordsize eq 8 for 64-bit x64 | 12 | ; @wordsize eq 8 for 64-bit x64 |
diff --git a/Asm/x86/Sha256Opt.asm b/Asm/x86/Sha256Opt.asm index 5d02c90..3e9f6ed 100644 --- a/Asm/x86/Sha256Opt.asm +++ b/Asm/x86/Sha256Opt.asm | |||
@@ -1,5 +1,5 @@ | |||
1 | ; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions | 1 | ; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions |
2 | ; 2021-03-10 : Igor Pavlov : Public domain | 2 | ; 2022-04-17 : Igor Pavlov : Public domain |
3 | 3 | ||
4 | include 7zAsm.asm | 4 | include 7zAsm.asm |
5 | 5 | ||
@@ -54,14 +54,20 @@ ifndef x64 | |||
54 | .686 | 54 | .686 |
55 | .xmm | 55 | .xmm |
56 | endif | 56 | endif |
57 | 57 | ||
58 | ; jwasm-based assemblers for linux and linker from new versions of binutils | ||
59 | ; can generate incorrect code for load [ARRAY + offset] instructions. | ||
60 | ; 22.00: we load K_CONST offset to (rTable) register to avoid jwasm+binutils problem | ||
61 | rTable equ r0 | ||
62 | ; rTable equ K_CONST | ||
63 | |||
58 | ifdef x64 | 64 | ifdef x64 |
59 | rNum equ REG_ABI_PARAM_2 | 65 | rNum equ REG_ABI_PARAM_2 |
60 | if (IS_LINUX eq 0) | 66 | if (IS_LINUX eq 0) |
61 | LOCAL_SIZE equ (16 * 2) | 67 | LOCAL_SIZE equ (16 * 2) |
62 | endif | 68 | endif |
63 | else | 69 | else |
64 | rNum equ r0 | 70 | rNum equ r3 |
65 | LOCAL_SIZE equ (16 * 1) | 71 | LOCAL_SIZE equ (16 * 1) |
66 | endif | 72 | endif |
67 | 73 | ||
@@ -103,15 +109,18 @@ MY_PROLOG macro | |||
103 | movdqa [r4 + 16], xmm9 | 109 | movdqa [r4 + 16], xmm9 |
104 | endif | 110 | endif |
105 | else ; x86 | 111 | else ; x86 |
112 | push r3 | ||
113 | push r5 | ||
114 | mov r5, r4 | ||
115 | NUM_PUSH_REGS equ 2 | ||
116 | PARAM_OFFSET equ (REG_SIZE * (1 + NUM_PUSH_REGS)) | ||
106 | if (IS_CDECL gt 0) | 117 | if (IS_CDECL gt 0) |
107 | mov rState, [r4 + REG_SIZE * 1] | 118 | mov rState, [r4 + PARAM_OFFSET] |
108 | mov rData, [r4 + REG_SIZE * 2] | 119 | mov rData, [r4 + PARAM_OFFSET + REG_SIZE * 1] |
109 | mov rNum, [r4 + REG_SIZE * 3] | 120 | mov rNum, [r4 + PARAM_OFFSET + REG_SIZE * 2] |
110 | else ; fastcall | 121 | else ; fastcall |
111 | mov rNum, [r4 + REG_SIZE * 1] | 122 | mov rNum, [r4 + PARAM_OFFSET] |
112 | endif | 123 | endif |
113 | push r5 | ||
114 | mov r5, r4 | ||
115 | and r4, -16 | 124 | and r4, -16 |
116 | sub r4, LOCAL_SIZE | 125 | sub r4, LOCAL_SIZE |
117 | endif | 126 | endif |
@@ -129,6 +138,7 @@ MY_EPILOG macro | |||
129 | else ; x86 | 138 | else ; x86 |
130 | mov r4, r5 | 139 | mov r4, r5 |
131 | pop r5 | 140 | pop r5 |
141 | pop r3 | ||
132 | endif | 142 | endif |
133 | MY_ENDP | 143 | MY_ENDP |
134 | endm | 144 | endm |
@@ -171,7 +181,7 @@ pre2 equ 2 | |||
171 | 181 | ||
172 | 182 | ||
173 | RND4 macro k | 183 | RND4 macro k |
174 | movdqa msg, xmmword ptr [K_CONST + (k) * 16] | 184 | movdqa msg, xmmword ptr [rTable + (k) * 16] |
175 | paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4))) | 185 | paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4))) |
176 | MY_sha256rnds2 state0_N, state1_N | 186 | MY_sha256rnds2 state0_N, state1_N |
177 | pshufd msg, msg, 0eH | 187 | pshufd msg, msg, 0eH |
@@ -210,6 +220,8 @@ endm | |||
210 | MY_PROC Sha256_UpdateBlocks_HW, 3 | 220 | MY_PROC Sha256_UpdateBlocks_HW, 3 |
211 | MY_PROLOG | 221 | MY_PROLOG |
212 | 222 | ||
223 | lea rTable, [K_CONST] | ||
224 | |||
213 | cmp rNum, 0 | 225 | cmp rNum, 0 |
214 | je end_c | 226 | je end_c |
215 | 227 | ||