diff options
Diffstat (limited to 'Asm/x86')
| -rw-r--r-- | Asm/x86/7zAsm.asm | 7 | ||||
| -rw-r--r-- | Asm/x86/Sha256Opt.asm | 32 |
2 files changed, 28 insertions, 11 deletions
diff --git a/Asm/x86/7zAsm.asm b/Asm/x86/7zAsm.asm index 6275bb7..19c40da 100644 --- a/Asm/x86/7zAsm.asm +++ b/Asm/x86/7zAsm.asm | |||
| @@ -1,7 +1,12 @@ | |||
| 1 | ; 7zAsm.asm -- ASM macros | 1 | ; 7zAsm.asm -- ASM macros |
| 2 | ; 2021-12-25 : Igor Pavlov : Public domain | 2 | ; 2022-05-16 : Igor Pavlov : Public domain |
| 3 | 3 | ||
| 4 | 4 | ||
| 5 | ; UASM can require these changes | ||
| 6 | ; OPTION FRAMEPRESERVEFLAGS:ON | ||
| 7 | ; OPTION PROLOGUE:NONE | ||
| 8 | ; OPTION EPILOGUE:NONE | ||
| 9 | |||
| 5 | ifdef @wordsize | 10 | ifdef @wordsize |
| 6 | ; @wordsize is defined only in JWASM and ASMC and is not defined in MASM | 11 | ; @wordsize is defined only in JWASM and ASMC and is not defined in MASM |
| 7 | ; @wordsize eq 8 for 64-bit x64 | 12 | ; @wordsize eq 8 for 64-bit x64 |
diff --git a/Asm/x86/Sha256Opt.asm b/Asm/x86/Sha256Opt.asm index 5d02c90..3e9f6ed 100644 --- a/Asm/x86/Sha256Opt.asm +++ b/Asm/x86/Sha256Opt.asm | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | ; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions | 1 | ; Sha256Opt.asm -- SHA-256 optimized code for SHA-256 x86 hardware instructions |
| 2 | ; 2021-03-10 : Igor Pavlov : Public domain | 2 | ; 2022-04-17 : Igor Pavlov : Public domain |
| 3 | 3 | ||
| 4 | include 7zAsm.asm | 4 | include 7zAsm.asm |
| 5 | 5 | ||
| @@ -54,14 +54,20 @@ ifndef x64 | |||
| 54 | .686 | 54 | .686 |
| 55 | .xmm | 55 | .xmm |
| 56 | endif | 56 | endif |
| 57 | 57 | ||
| 58 | ; jwasm-based assemblers for linux and linker from new versions of binutils | ||
| 59 | ; can generate incorrect code for load [ARRAY + offset] instructions. | ||
| 60 | ; 22.00: we load K_CONST offset to (rTable) register to avoid jwasm+binutils problem | ||
| 61 | rTable equ r0 | ||
| 62 | ; rTable equ K_CONST | ||
| 63 | |||
| 58 | ifdef x64 | 64 | ifdef x64 |
| 59 | rNum equ REG_ABI_PARAM_2 | 65 | rNum equ REG_ABI_PARAM_2 |
| 60 | if (IS_LINUX eq 0) | 66 | if (IS_LINUX eq 0) |
| 61 | LOCAL_SIZE equ (16 * 2) | 67 | LOCAL_SIZE equ (16 * 2) |
| 62 | endif | 68 | endif |
| 63 | else | 69 | else |
| 64 | rNum equ r0 | 70 | rNum equ r3 |
| 65 | LOCAL_SIZE equ (16 * 1) | 71 | LOCAL_SIZE equ (16 * 1) |
| 66 | endif | 72 | endif |
| 67 | 73 | ||
| @@ -103,15 +109,18 @@ MY_PROLOG macro | |||
| 103 | movdqa [r4 + 16], xmm9 | 109 | movdqa [r4 + 16], xmm9 |
| 104 | endif | 110 | endif |
| 105 | else ; x86 | 111 | else ; x86 |
| 112 | push r3 | ||
| 113 | push r5 | ||
| 114 | mov r5, r4 | ||
| 115 | NUM_PUSH_REGS equ 2 | ||
| 116 | PARAM_OFFSET equ (REG_SIZE * (1 + NUM_PUSH_REGS)) | ||
| 106 | if (IS_CDECL gt 0) | 117 | if (IS_CDECL gt 0) |
| 107 | mov rState, [r4 + REG_SIZE * 1] | 118 | mov rState, [r4 + PARAM_OFFSET] |
| 108 | mov rData, [r4 + REG_SIZE * 2] | 119 | mov rData, [r4 + PARAM_OFFSET + REG_SIZE * 1] |
| 109 | mov rNum, [r4 + REG_SIZE * 3] | 120 | mov rNum, [r4 + PARAM_OFFSET + REG_SIZE * 2] |
| 110 | else ; fastcall | 121 | else ; fastcall |
| 111 | mov rNum, [r4 + REG_SIZE * 1] | 122 | mov rNum, [r4 + PARAM_OFFSET] |
| 112 | endif | 123 | endif |
| 113 | push r5 | ||
| 114 | mov r5, r4 | ||
| 115 | and r4, -16 | 124 | and r4, -16 |
| 116 | sub r4, LOCAL_SIZE | 125 | sub r4, LOCAL_SIZE |
| 117 | endif | 126 | endif |
| @@ -129,6 +138,7 @@ MY_EPILOG macro | |||
| 129 | else ; x86 | 138 | else ; x86 |
| 130 | mov r4, r5 | 139 | mov r4, r5 |
| 131 | pop r5 | 140 | pop r5 |
| 141 | pop r3 | ||
| 132 | endif | 142 | endif |
| 133 | MY_ENDP | 143 | MY_ENDP |
| 134 | endm | 144 | endm |
| @@ -171,7 +181,7 @@ pre2 equ 2 | |||
| 171 | 181 | ||
| 172 | 182 | ||
| 173 | RND4 macro k | 183 | RND4 macro k |
| 174 | movdqa msg, xmmword ptr [K_CONST + (k) * 16] | 184 | movdqa msg, xmmword ptr [rTable + (k) * 16] |
| 175 | paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4))) | 185 | paddd msg, @CatStr(xmm, %(w_regs + ((k + 0) mod 4))) |
| 176 | MY_sha256rnds2 state0_N, state1_N | 186 | MY_sha256rnds2 state0_N, state1_N |
| 177 | pshufd msg, msg, 0eH | 187 | pshufd msg, msg, 0eH |
| @@ -210,6 +220,8 @@ endm | |||
| 210 | MY_PROC Sha256_UpdateBlocks_HW, 3 | 220 | MY_PROC Sha256_UpdateBlocks_HW, 3 |
| 211 | MY_PROLOG | 221 | MY_PROLOG |
| 212 | 222 | ||
| 223 | lea rTable, [K_CONST] | ||
| 224 | |||
| 213 | cmp rNum, 0 | 225 | cmp rNum, 0 |
| 214 | je end_c | 226 | je end_c |
| 215 | 227 | ||
