aboutsummaryrefslogtreecommitdiff
path: root/Asm
diff options
context:
space:
mode:
Diffstat (limited to 'Asm')
-rw-r--r--Asm/x86/7zAsm.asm7
-rw-r--r--Asm/x86/Sha256Opt.asm32
2 files changed, 28 insertions, 11 deletions
diff --git a/Asm/x86/7zAsm.asm b/Asm/x86/7zAsm.asm
index 6275bb7..19c40da 100644
--- a/Asm/x86/7zAsm.asm
+++ b/Asm/x86/7zAsm.asm
@@ -1,7 +1,12 @@
1; 7zAsm.asm -- ASM macros 1; 7zAsm.asm -- ASM macros
2; 2021-12-25 : Igor Pavlov : Public domain 2; 2022-05-16 : Igor Pavlov : Public domain
3 3
4 4
5; UASM can require these changes
6; OPTION FRAMEPRESERVEFLAGS:ON
7; OPTION PROLOGUE:NONE
8; OPTION EPILOGUE:NONE
9
5ifdef @wordsize 10ifdef @wordsize
6; @wordsize is defined only in JWASM and ASMC and is not defined in MASM 11; @wordsize is defined only in JWASM and ASMC and is not defined in MASM
7; @wordsize eq 8 for 64-bit x64 12; @wordsize eq 8 for 64-bit x64
diff --git a/Asm/x86/Sha256Opt.asm b/Asm/x86/Sha256Opt.asm
index 5d02c90..3e9f6ed 100644
--- a/Asm/x86/Sha256Opt.asm
+++ b/Asm/x86/Sha256Opt.asm
@@ -1,5 +1,5 @@
1; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions 1; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions
2; 2021-03-10 : Igor Pavlov : Public domain 2; 2022-04-17 : Igor Pavlov : Public domain
3 3
4include 7zAsm.asm 4include 7zAsm.asm
5 5
@@ -54,14 +54,20 @@ ifndef x64
54 .686 54 .686
55 .xmm 55 .xmm
56endif 56endif
57 57
58; jwasm-based assemblers for linux and linker from new versions of binutils
59; can generate incorrect code for load [ARRAY + offset] instructions.
60; 22.00: we load K_CONST offset to (rTable) register to avoid jwasm+binutils problem
61 rTable equ r0
62 ; rTable equ K_CONST
63
58ifdef x64 64ifdef x64
59 rNum equ REG_ABI_PARAM_2 65 rNum equ REG_ABI_PARAM_2
60 if (IS_LINUX eq 0) 66 if (IS_LINUX eq 0)
61 LOCAL_SIZE equ (16 * 2) 67 LOCAL_SIZE equ (16 * 2)
62 endif 68 endif
63else 69else
64 rNum equ r0 70 rNum equ r3
65 LOCAL_SIZE equ (16 * 1) 71 LOCAL_SIZE equ (16 * 1)
66endif 72endif
67 73
@@ -103,15 +109,18 @@ MY_PROLOG macro
103 movdqa [r4 + 16], xmm9 109 movdqa [r4 + 16], xmm9
104 endif 110 endif
105 else ; x86 111 else ; x86
112 push r3
113 push r5
114 mov r5, r4
115 NUM_PUSH_REGS equ 2
116 PARAM_OFFSET equ (REG_SIZE * (1 + NUM_PUSH_REGS))
106 if (IS_CDECL gt 0) 117 if (IS_CDECL gt 0)
107 mov rState, [r4 + REG_SIZE * 1] 118 mov rState, [r4 + PARAM_OFFSET]
108 mov rData, [r4 + REG_SIZE * 2] 119 mov rData, [r4 + PARAM_OFFSET + REG_SIZE * 1]
109 mov rNum, [r4 + REG_SIZE * 3] 120 mov rNum, [r4 + PARAM_OFFSET + REG_SIZE * 2]
110 else ; fastcall 121 else ; fastcall
111 mov rNum, [r4 + REG_SIZE * 1] 122 mov rNum, [r4 + PARAM_OFFSET]
112 endif 123 endif
113 push r5
114 mov r5, r4
115 and r4, -16 124 and r4, -16
116 sub r4, LOCAL_SIZE 125 sub r4, LOCAL_SIZE
117 endif 126 endif
@@ -129,6 +138,7 @@ MY_EPILOG macro
129 else ; x86 138 else ; x86
130 mov r4, r5 139 mov r4, r5
131 pop r5 140 pop r5
141 pop r3
132 endif 142 endif
133 MY_ENDP 143 MY_ENDP
134endm 144endm
@@ -171,7 +181,7 @@ pre2 equ 2
171 181
172 182
173RND4 macro k 183RND4 macro k
174 movdqa msg, xmmword ptr [K_CONST + (k) * 16] 184 movdqa msg, xmmword ptr [rTable + (k) * 16]
175 paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4))) 185 paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4)))
176 MY_sha256rnds2 state0_N, state1_N 186 MY_sha256rnds2 state0_N, state1_N
177 pshufd msg, msg, 0eH 187 pshufd msg, msg, 0eH
@@ -210,6 +220,8 @@ endm
210MY_PROC Sha256_UpdateBlocks_HW, 3 220MY_PROC Sha256_UpdateBlocks_HW, 3
211 MY_PROLOG 221 MY_PROLOG
212 222
223 lea rTable, [K_CONST]
224
213 cmp rNum, 0 225 cmp rNum, 0
214 je end_c 226 je end_c
215 227