diff options
author | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2023-06-21 00:00:00 +0000 |
---|---|---|
committer | Igor Pavlov <87184205+ip7z@users.noreply.github.com> | 2023-12-17 14:59:19 +0500 |
commit | 5b39dc76f1bc82f941d5c800ab9f34407a06b53a (patch) | |
tree | fe5e17420300b715021a76328444088d32047963 /C/Bra86.c | |
parent | 93be7d4abfd4233228f58ee1fbbcd76d91be66a4 (diff) | |
download | 7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.tar.gz 7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.tar.bz2 7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.zip |
23.0123.01
Diffstat (limited to 'C/Bra86.c')
-rw-r--r-- | C/Bra86.c | 221 |
1 files changed, 163 insertions, 58 deletions
@@ -1,82 +1,187 @@ | |||
1 | /* Bra86.c -- Converter for x86 code (BCJ) | 1 | /* Bra86.c -- Branch converter for X86 code (BCJ) |
2 | 2021-02-09 : Igor Pavlov : Public domain */ | 2 | 2023-04-02 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | 5 | ||
6 | #include "Bra.h" | 6 | #include "Bra.h" |
7 | #include "CpuArch.h" | ||
7 | 8 | ||
8 | #define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0) | ||
9 | 9 | ||
10 | SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding) | 10 | #if defined(MY_CPU_SIZEOF_POINTER) \ |
11 | && ( MY_CPU_SIZEOF_POINTER == 4 \ | ||
12 | || MY_CPU_SIZEOF_POINTER == 8) | ||
13 | #define BR_CONV_USE_OPT_PC_PTR | ||
14 | #endif | ||
15 | |||
16 | #ifdef BR_CONV_USE_OPT_PC_PTR | ||
17 | #define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t) | ||
18 | #define BR_PC_GET (pc + (UInt32)(SizeT)p) | ||
19 | #else | ||
20 | #define BR_PC_INIT pc += (UInt32)size; | ||
21 | #define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) | ||
22 | // #define BR_PC_INIT | ||
23 | // #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) | ||
24 | #endif | ||
25 | |||
26 | #define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; | ||
27 | // #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; | ||
28 | |||
29 | #define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name | ||
30 | |||
31 | #define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0) | ||
32 | |||
33 | #ifdef MY_CPU_LE_UNALIGN | ||
34 | #define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8; | ||
35 | #define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0) | ||
36 | #else | ||
37 | #define BR86_PREPARE_BCJ_SCAN | ||
38 | // bad for MSVC X86 (partial write to byte reg): | ||
39 | #define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8) | ||
40 | // bad for old MSVC (partial write to byte reg): | ||
41 | // #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0) | ||
42 | #endif | ||
43 | |||
44 | static | ||
45 | Z7_FORCE_INLINE | ||
46 | Z7_ATTRIB_NO_VECTOR | ||
47 | Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding) | ||
11 | { | 48 | { |
12 | SizeT pos = 0; | ||
13 | UInt32 mask = *state & 7; | ||
14 | if (size < 5) | 49 | if (size < 5) |
15 | return 0; | 50 | return p; |
16 | size -= 4; | 51 | { |
17 | ip += 5; | 52 | // Byte *p = data; |
53 | const Byte *lim = p + size - 4; | ||
54 | unsigned mask = (unsigned)*state; // & 7; | ||
55 | #ifdef BR_CONV_USE_OPT_PC_PTR | ||
56 | /* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4), | ||
57 | because call/jump offset is relative to the next instruction. | ||
58 | if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4), | ||
59 | because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before. | ||
60 | */ | ||
61 | pc += 4; | ||
62 | #endif | ||
63 | BR_PC_INIT | ||
64 | goto start; | ||
18 | 65 | ||
19 | for (;;) | 66 | for (;; mask |= 4) |
20 | { | 67 | { |
21 | Byte *p = data + pos; | 68 | // cont: mask |= 4; |
22 | const Byte *limit = data + size; | 69 | start: |
23 | for (; p < limit; p++) | 70 | if (p >= lim) |
24 | if ((*p & 0xFE) == 0xE8) | 71 | goto fin; |
25 | break; | ||
26 | |||
27 | { | 72 | { |
28 | SizeT d = (SizeT)(p - data) - pos; | 73 | BR86_PREPARE_BCJ_SCAN |
29 | pos = (SizeT)(p - data); | 74 | p += 4; |
30 | if (p >= limit) | 75 | if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1; |
31 | { | 76 | if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1; |
32 | *state = (d > 2 ? 0 : mask >> (unsigned)d); | 77 | if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0; |
33 | return pos; | 78 | if (BR86_IS_BCJ_BYTE(3)) { goto a3; } |
34 | } | ||
35 | if (d > 2) | ||
36 | mask = 0; | ||
37 | else | ||
38 | { | ||
39 | mask >>= (unsigned)d; | ||
40 | if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1]))) | ||
41 | { | ||
42 | mask = (mask >> 1) | 4; | ||
43 | pos++; | ||
44 | continue; | ||
45 | } | ||
46 | } | ||
47 | } | 79 | } |
80 | goto main_loop; | ||
48 | 81 | ||
49 | if (Test86MSByte(p[4])) | 82 | m0: p--; |
83 | m1: p--; | ||
84 | m2: p--; | ||
85 | if (mask == 0) | ||
86 | goto a3; | ||
87 | if (p > lim) | ||
88 | goto fin_p; | ||
89 | |||
90 | // if (((0x17u >> mask) & 1) == 0) | ||
91 | if (mask > 4 || mask == 3) | ||
92 | { | ||
93 | mask >>= 1; | ||
94 | continue; // goto cont; | ||
95 | } | ||
96 | mask >>= 1; | ||
97 | if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask])) | ||
98 | continue; // goto cont; | ||
99 | // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; | ||
50 | { | 100 | { |
51 | UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]); | 101 | UInt32 v = GetUi32(p); |
52 | UInt32 cur = ip + (UInt32)pos; | 102 | UInt32 c; |
53 | pos += 5; | 103 | v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; |
54 | if (encoding) | 104 | c = BR_PC_GET; |
55 | v += cur; | 105 | BR_CONVERT_VAL(v, c) |
56 | else | ||
57 | v -= cur; | ||
58 | if (mask != 0) | ||
59 | { | 106 | { |
60 | unsigned sh = (mask & 6) << 2; | 107 | mask <<= 3; |
61 | if (Test86MSByte((Byte)(v >> sh))) | 108 | if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask)) |
62 | { | 109 | { |
63 | v ^= (((UInt32)0x100 << sh) - 1); | 110 | v ^= (((UInt32)0x100 << mask) - 1); |
64 | if (encoding) | 111 | #ifdef MY_CPU_X86 |
65 | v += cur; | 112 | // for X86 : we can recalculate (c) to reduce register pressure |
66 | else | 113 | c = BR_PC_GET; |
67 | v -= cur; | 114 | #endif |
115 | BR_CONVERT_VAL(v, c) | ||
68 | } | 116 | } |
69 | mask = 0; | 117 | mask = 0; |
70 | } | 118 | } |
71 | p[1] = (Byte)v; | 119 | // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); |
72 | p[2] = (Byte)(v >> 8); | 120 | v &= (1 << 25) - 1; v -= (1 << 24); |
73 | p[3] = (Byte)(v >> 16); | 121 | SetUi32(p, v) |
74 | p[4] = (Byte)(0 - ((v >> 24) & 1)); | 122 | p += 4; |
123 | goto main_loop; | ||
75 | } | 124 | } |
76 | else | 125 | |
126 | main_loop: | ||
127 | if (p >= lim) | ||
128 | goto fin; | ||
129 | for (;;) | ||
77 | { | 130 | { |
78 | mask = (mask >> 1) | 4; | 131 | BR86_PREPARE_BCJ_SCAN |
79 | pos++; | 132 | p += 4; |
133 | if (BR86_IS_BCJ_BYTE(0)) { goto a0; } | ||
134 | if (BR86_IS_BCJ_BYTE(1)) { goto a1; } | ||
135 | if (BR86_IS_BCJ_BYTE(2)) { goto a2; } | ||
136 | if (BR86_IS_BCJ_BYTE(3)) { goto a3; } | ||
137 | if (p >= lim) | ||
138 | goto fin; | ||
139 | } | ||
140 | |||
141 | a0: p--; | ||
142 | a1: p--; | ||
143 | a2: p--; | ||
144 | a3: | ||
145 | if (p > lim) | ||
146 | goto fin_p; | ||
147 | // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont; | ||
148 | { | ||
149 | UInt32 v = GetUi32(p); | ||
150 | UInt32 c; | ||
151 | v += (1 << 24); if (v & 0xfe000000) continue; // goto cont; | ||
152 | c = BR_PC_GET; | ||
153 | BR_CONVERT_VAL(v, c) | ||
154 | // v = (v & ((1 << 24) - 1)) - (v & (1 << 24)); | ||
155 | v &= (1 << 25) - 1; v -= (1 << 24); | ||
156 | SetUi32(p, v) | ||
157 | p += 4; | ||
158 | goto main_loop; | ||
80 | } | 159 | } |
81 | } | 160 | } |
161 | |||
162 | fin_p: | ||
163 | p--; | ||
164 | fin: | ||
165 | // the following processing for tail is optional and can be commented | ||
166 | /* | ||
167 | lim += 4; | ||
168 | for (; p < lim; p++, mask >>= 1) | ||
169 | if ((*p & 0xfe) == 0xe8) | ||
170 | break; | ||
171 | */ | ||
172 | *state = (UInt32)mask; | ||
173 | return p; | ||
174 | } | ||
82 | } | 175 | } |
176 | |||
177 | |||
178 | #define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \ | ||
179 | Z7_NO_INLINE \ | ||
180 | Z7_ATTRIB_NO_VECTOR \ | ||
181 | Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \ | ||
182 | { return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); } | ||
183 | |||
184 | Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0) | ||
185 | #ifndef Z7_EXTRACT_ONLY | ||
186 | Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1) | ||
187 | #endif | ||