From a3e1d227377188734b82f023f96f8e25dc40f3e6 Mon Sep 17 00:00:00 2001 From: Igor Pavlov <87184205+ip7z@users.noreply.github.com> Date: Mon, 20 Jun 2022 00:00:00 +0000 Subject: 22.00 --- Asm/x86/7zAsm.asm | 7 ++++++- Asm/x86/Sha256Opt.asm | 32 ++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 11 deletions(-) (limited to 'Asm') diff --git a/Asm/x86/7zAsm.asm b/Asm/x86/7zAsm.asm index 6275bb7..19c40da 100644 --- a/Asm/x86/7zAsm.asm +++ b/Asm/x86/7zAsm.asm @@ -1,7 +1,12 @@ ; 7zAsm.asm -- ASM macros -; 2021-12-25 : Igor Pavlov : Public domain +; 2022-05-16 : Igor Pavlov : Public domain +; UASM can require these changes +; OPTION FRAMEPRESERVEFLAGS:ON +; OPTION PROLOGUE:NONE +; OPTION EPILOGUE:NONE + ifdef @wordsize ; @wordsize is defined only in JWASM and ASMC and is not defined in MASM ; @wordsize eq 8 for 64-bit x64 diff --git a/Asm/x86/Sha256Opt.asm b/Asm/x86/Sha256Opt.asm index 5d02c90..3e9f6ed 100644 --- a/Asm/x86/Sha256Opt.asm +++ b/Asm/x86/Sha256Opt.asm @@ -1,5 +1,5 @@ ; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions -; 2021-03-10 : Igor Pavlov : Public domain +; 2022-04-17 : Igor Pavlov : Public domain include 7zAsm.asm @@ -54,14 +54,20 @@ ifndef x64 .686 .xmm endif - + +; jwasm-based assemblers for linux and linker from new versions of binutils +; can generate incorrect code for load [ARRAY + offset] instructions. +; 22.00: we load K_CONST offset to (rTable) register to avoid jwasm+binutils problem + rTable equ r0 + ; rTable equ K_CONST + ifdef x64 rNum equ REG_ABI_PARAM_2 if (IS_LINUX eq 0) LOCAL_SIZE equ (16 * 2) endif else - rNum equ r0 + rNum equ r3 LOCAL_SIZE equ (16 * 1) endif @@ -103,15 +109,18 @@ MY_PROLOG macro movdqa [r4 + 16], xmm9 endif else ; x86 + push r3 + push r5 + mov r5, r4 + NUM_PUSH_REGS equ 2 + PARAM_OFFSET equ (REG_SIZE * (1 + NUM_PUSH_REGS)) if (IS_CDECL gt 0) - mov rState, [r4 + REG_SIZE * 1] - mov rData, [r4 + REG_SIZE * 2] - mov rNum, [r4 + REG_SIZE * 3] + mov rState, [r4 + PARAM_OFFSET] + mov rData, [r4 + PARAM_OFFSET + REG_SIZE * 1] + mov rNum, [r4 + PARAM_OFFSET + REG_SIZE * 2] else ; fastcall - mov rNum, [r4 + REG_SIZE * 1] + mov rNum, [r4 + PARAM_OFFSET] endif - push r5 - mov r5, r4 and r4, -16 sub r4, LOCAL_SIZE endif @@ -129,6 +138,7 @@ MY_EPILOG macro else ; x86 mov r4, r5 pop r5 + pop r3 endif MY_ENDP endm @@ -171,7 +181,7 @@ pre2 equ 2 RND4 macro k - movdqa msg, xmmword ptr [K_CONST + (k) * 16] + movdqa msg, xmmword ptr [rTable + (k) * 16] paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4))) MY_sha256rnds2 state0_N, state1_N pshufd msg, msg, 0eH @@ -210,6 +220,8 @@ endm MY_PROC Sha256_UpdateBlocks_HW, 3 MY_PROLOG + lea rTable, [K_CONST] + cmp rNum, 0 je end_c -- cgit v1.2.3-55-g6feb