aboutsummaryrefslogtreecommitdiff
path: root/C/Bra.c
diff options
context:
space:
mode:
authorIgor Pavlov <87184205+ip7z@users.noreply.github.com>2023-06-21 00:00:00 +0000
committerIgor Pavlov <87184205+ip7z@users.noreply.github.com>2023-12-17 14:59:19 +0500
commit5b39dc76f1bc82f941d5c800ab9f34407a06b53a (patch)
treefe5e17420300b715021a76328444088d32047963 /C/Bra.c
parent93be7d4abfd4233228f58ee1fbbcd76d91be66a4 (diff)
download7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.tar.gz
7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.tar.bz2
7zip-5b39dc76f1bc82f941d5c800ab9f34407a06b53a.zip
23.0123.01
Diffstat (limited to 'C/Bra.c')
-rw-r--r--C/Bra.c496
1 files changed, 343 insertions, 153 deletions
diff --git a/C/Bra.c b/C/Bra.c
index 3b854d9..22e0e47 100644
--- a/C/Bra.c
+++ b/C/Bra.c
@@ -1,230 +1,420 @@
1/* Bra.c -- Converters for RISC code 1/* Bra.c -- Branch converters for RISC code
22021-02-09 : Igor Pavlov : Public domain */ 22023-04-02 : Igor Pavlov : Public domain */
3 3
4#include "Precomp.h" 4#include "Precomp.h"
5 5
6#include "CpuArch.h"
7#include "Bra.h" 6#include "Bra.h"
7#include "CpuArch.h"
8#include "RotateDefs.h"
9
10#if defined(MY_CPU_SIZEOF_POINTER) \
11 && ( MY_CPU_SIZEOF_POINTER == 4 \
12 || MY_CPU_SIZEOF_POINTER == 8)
13 #define BR_CONV_USE_OPT_PC_PTR
14#endif
15
16#ifdef BR_CONV_USE_OPT_PC_PTR
17#define BR_PC_INIT pc -= (UInt32)(SizeT)p;
18#define BR_PC_GET (pc + (UInt32)(SizeT)p)
19#else
20#define BR_PC_INIT pc += (UInt32)size;
21#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
22// #define BR_PC_INIT
23// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
24#endif
25
26#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
27// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
28
29#define Z7_BRANCH_CONV(name) z7_BranchConv_ ## name
30
31#define Z7_BRANCH_FUNC_MAIN(name) \
32static \
33Z7_FORCE_INLINE \
34Z7_ATTRIB_NO_VECTOR \
35Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding)
8 36
9SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) 37#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \
38Z7_NO_INLINE \
39Z7_ATTRIB_NO_VECTOR \
40Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
41 { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \
42
43#ifdef Z7_EXTRACT_ONLY
44#define Z7_BRANCH_FUNCS_IMP(name) \
45 Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0)
46#else
47#define Z7_BRANCH_FUNCS_IMP(name) \
48 Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) \
49 Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC, 1)
50#endif
51
52#if defined(__clang__)
53#define BR_EXTERNAL_FOR
54#define BR_NEXT_ITERATION continue;
55#else
56#define BR_EXTERNAL_FOR for (;;)
57#define BR_NEXT_ITERATION break;
58#endif
59
60#if defined(__clang__) && (__clang_major__ >= 8) \
61 || defined(__GNUC__) && (__GNUC__ >= 1000) \
62 // GCC is not good for __builtin_expect() here
63 /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
64 // #define Z7_unlikely [[unlikely]]
65 // #define Z7_LIKELY(x) (__builtin_expect((x), 1))
66 #define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
67 // #define Z7_likely [[likely]]
68#else
69 // #define Z7_LIKELY(x) (x)
70 #define Z7_UNLIKELY(x) (x)
71 // #define Z7_likely
72#endif
73
74
75Z7_BRANCH_FUNC_MAIN(ARM64)
10{ 76{
11 Byte *p; 77 // Byte *p = data;
12 const Byte *lim; 78 const Byte *lim;
13 size &= ~(size_t)3; 79 const UInt32 flag = (UInt32)1 << (24 - 4);
14 ip += 4; 80 const UInt32 mask = ((UInt32)1 << 24) - (flag << 1);
15 p = data; 81 size &= ~(SizeT)3;
16 lim = data + size; 82 // if (size == 0) return p;
83 lim = p + size;
84 BR_PC_INIT
85 pc -= 4; // because (p) will point to next instruction
86
87 BR_EXTERNAL_FOR
88 {
89 // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
90 for (;;)
91 {
92 UInt32 v;
93 if Z7_UNLIKELY(p == lim)
94 return p;
95 v = GetUi32a(p);
96 p += 4;
97 if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0)
98 {
99 UInt32 c = BR_PC_GET >> 2;
100 BR_CONVERT_VAL(v, c)
101 v &= 0x03ffffff;
102 v |= 0x94000000;
103 SetUi32a(p - 4, v)
104 BR_NEXT_ITERATION
105 }
106 // v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0)
107 v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0)
108 {
109 UInt32 z, c;
110 // v = rotrFixed(v, 8);
111 v += flag; if Z7_UNLIKELY(v & mask) continue;
112 z = (v & 0xffffffe0) | (v >> 26);
113 c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7;
114 BR_CONVERT_VAL(z, c)
115 v &= 0x1f;
116 v |= 0x90000000;
117 v |= z << 26;
118 v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag);
119 SetUi32a(p - 4, v)
120 }
121 }
122 }
123}
124Z7_BRANCH_FUNCS_IMP(ARM64)
17 125
18 if (encoding)
19 126
127Z7_BRANCH_FUNC_MAIN(ARM)
128{
129 // Byte *p = data;
130 const Byte *lim;
131 size &= ~(SizeT)3;
132 lim = p + size;
133 BR_PC_INIT
134 /* in ARM: branch offset is relative to the +2 instructions from current instruction.
135 (p) will point to next instruction */
136 pc += 8 - 4;
137
20 for (;;) 138 for (;;)
21 { 139 {
22 for (;;) 140 for (;;)
23 { 141 {
24 if (p >= lim) 142 if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
25 return (SizeT)(p - data); 143 if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
26 p += 4;
27 if (p[-1] == 0xEB)
28 break;
29 } 144 }
30 { 145 {
31 UInt32 v = GetUi32(p - 4); 146 UInt32 v = GetUi32a(p - 4);
32 v <<= 2; 147 UInt32 c = BR_PC_GET >> 2;
33 v += ip + (UInt32)(p - data); 148 BR_CONVERT_VAL(v, c)
34 v >>= 2; 149 v &= 0x00ffffff;
35 v &= 0x00FFFFFF; 150 v |= 0xeb000000;
36 v |= 0xEB000000; 151 SetUi32a(p - 4, v)
37 SetUi32(p - 4, v);
38 } 152 }
39 } 153 }
154}
155Z7_BRANCH_FUNCS_IMP(ARM)
156
40 157
158Z7_BRANCH_FUNC_MAIN(PPC)
159{
160 // Byte *p = data;
161 const Byte *lim;
162 size &= ~(SizeT)3;
163 lim = p + size;
164 BR_PC_INIT
165 pc -= 4; // because (p) will point to next instruction
166
41 for (;;) 167 for (;;)
42 { 168 {
169 UInt32 v;
43 for (;;) 170 for (;;)
44 { 171 {
45 if (p >= lim) 172 if Z7_UNLIKELY(p == lim)
46 return (SizeT)(p - data); 173 return p;
174 // v = GetBe32a(p);
175 v = *(UInt32 *)(void *)p;
47 p += 4; 176 p += 4;
48 if (p[-1] == 0xEB) 177 // if ((v & 0xfc000003) == 0x48000001) break;
49 break; 178 // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break;
179 if Z7_UNLIKELY(
180 ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001))
181 & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break;
50 } 182 }
51 { 183 {
52 UInt32 v = GetUi32(p - 4); 184 v = Z7_CONV_NATIVE_TO_BE_32(v);
53 v <<= 2; 185 {
54 v -= ip + (UInt32)(p - data); 186 UInt32 c = BR_PC_GET;
55 v >>= 2; 187 BR_CONVERT_VAL(v, c)
56 v &= 0x00FFFFFF; 188 }
57 v |= 0xEB000000; 189 v &= 0x03ffffff;
58 SetUi32(p - 4, v); 190 v |= 0x48000000;
191 SetBe32a(p - 4, v)
59 } 192 }
60 } 193 }
61} 194}
195Z7_BRANCH_FUNCS_IMP(PPC)
62 196
63 197
64SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) 198#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
199#define BR_SPARC_USE_ROTATE
200#endif
201
202Z7_BRANCH_FUNC_MAIN(SPARC)
65{ 203{
66 Byte *p; 204 // Byte *p = data;
67 const Byte *lim; 205 const Byte *lim;
68 size &= ~(size_t)1; 206 const UInt32 flag = (UInt32)1 << 22;
69 p = data; 207 size &= ~(SizeT)3;
70 lim = data + size - 4; 208 lim = p + size;
71 209 BR_PC_INIT
72 if (encoding) 210 pc -= 4; // because (p) will point to next instruction
73
74 for (;;) 211 for (;;)
75 { 212 {
76 UInt32 b1; 213 UInt32 v;
77 for (;;) 214 for (;;)
78 { 215 {
79 UInt32 b3; 216 if Z7_UNLIKELY(p == lim)
80 if (p > lim) 217 return p;
81 return (SizeT)(p - data); 218 /* // the code without GetBe32a():
82 b1 = p[1]; 219 { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; }
83 b3 = p[3]; 220 */
84 p += 2; 221 v = GetBe32a(p);
85 b1 ^= 8; 222 p += 4;
86 if ((b3 & b1) >= 0xF8) 223 #ifdef BR_SPARC_USE_ROTATE
224 v = rotlFixed(v, 2);
225 v += (flag << 2) - 1;
226 if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0)
227 #else
228 v += (UInt32)5 << 29;
229 v ^= (UInt32)7 << 29;
230 v += flag;
231 if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0)
232 #endif
87 break; 233 break;
88 } 234 }
89 { 235 {
90 UInt32 v = 236 // UInt32 v = GetBe32a(p - 4);
91 ((UInt32)b1 << 19) 237 #ifndef BR_SPARC_USE_ROTATE
92 + (((UInt32)p[1] & 0x7) << 8) 238 v <<= 2;
93 + (((UInt32)p[-2] << 11)) 239 #endif
94 + (p[0]);
95
96 p += 2;
97 { 240 {
98 UInt32 cur = (ip + (UInt32)(p - data)) >> 1; 241 UInt32 c = BR_PC_GET;
99 v += cur; 242 BR_CONVERT_VAL(v, c)
100 } 243 }
101 244 v &= (flag << 3) - 1;
102 p[-4] = (Byte)(v >> 11); 245 #ifdef BR_SPARC_USE_ROTATE
103 p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7)); 246 v -= (flag << 2) - 1;
104 p[-2] = (Byte)v; 247 v = rotrFixed(v, 2);
105 p[-1] = (Byte)(0xF8 | (v >> 8)); 248 #else
249 v -= (flag << 2);
250 v >>= 2;
251 v |= (UInt32)1 << 30;
252 #endif
253 SetBe32a(p - 4, v)
106 } 254 }
107 } 255 }
256}
257Z7_BRANCH_FUNCS_IMP(SPARC)
258
259
260Z7_BRANCH_FUNC_MAIN(ARMT)
261{
262 // Byte *p = data;
263 Byte *lim;
264 size &= ~(SizeT)1;
265 // if (size == 0) return p;
266 if (size <= 2) return p;
267 size -= 2;
268 lim = p + size;
269 BR_PC_INIT
270 /* in ARM: branch offset is relative to the +2 instructions from current instruction.
271 (p) will point to the +2 instructions from current instruction */
272 // pc += 4 - 4;
273 // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1;
274 // #define ARMT_TAIL_PROC { goto armt_tail; }
275 #define ARMT_TAIL_PROC { return p; }
108 276
109 for (;;) 277 do
110 { 278 {
111 UInt32 b1; 279 /* in MSVC 32-bit x86 compilers:
280 UInt32 version : it loads value from memory with movzx
281 Byte version : it loads value to 8-bit register (AL/CL)
282 movzx version is slightly faster in some cpus
283 */
284 unsigned b1;
285 // Byte / unsigned
286 b1 = p[1];
287 // optimized version to reduce one (p >= lim) check:
288 // unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8)
112 for (;;) 289 for (;;)
113 { 290 {
114 UInt32 b3; 291 unsigned b3; // Byte / UInt32
115 if (p > lim) 292 /* (Byte)(b3) normalization can use low byte computations in MSVC.
116 return (SizeT)(p - data); 293 It gives smaller code, and no loss of speed in some compilers/cpus.
117 b1 = p[1]; 294 But new MSVC 32-bit x86 compilers use more slow load
118 b3 = p[3]; 295 from memory to low byte register in that case.
119 p += 2; 296 So we try to use full 32-bit computations for faster code.
120 b1 ^= 8; 297 */
121 if ((b3 & b1) >= 0xF8) 298 // if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break;
122 break; 299 if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break;
300 if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break;
123 } 301 }
124 { 302 {
303 /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation.
304 But gcc/clang for arm64 can use bfi instruction for full code here */
125 UInt32 v = 305 UInt32 v =
126 ((UInt32)b1 << 19) 306 ((UInt32)GetUi16a(p - 2) << 11) |
307 ((UInt32)GetUi16a(p) & 0x7FF);
308 /*
309 UInt32 v =
310 ((UInt32)p[1 - 2] << 19)
127 + (((UInt32)p[1] & 0x7) << 8) 311 + (((UInt32)p[1] & 0x7) << 8)
128 + (((UInt32)p[-2] << 11)) 312 + (((UInt32)p[-2] << 11))
129 + (p[0]); 313 + (p[0]);
130 314 */
131 p += 2; 315 p += 2;
132 { 316 {
133 UInt32 cur = (ip + (UInt32)(p - data)) >> 1; 317 UInt32 c = BR_PC_GET >> 1;
134 v -= cur; 318 BR_CONVERT_VAL(v, c)
135 } 319 }
136 320 SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000))
321 SetUi16a(p - 2, (UInt16)(v | 0xf800))
137 /* 322 /*
138 SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000));
139 SetUi16(p - 2, (UInt16)(v | 0xF800));
140 */
141
142 p[-4] = (Byte)(v >> 11); 323 p[-4] = (Byte)(v >> 11);
143 p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7)); 324 p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7));
144 p[-2] = (Byte)v; 325 p[-2] = (Byte)v;
145 p[-1] = (Byte)(0xF8 | (v >> 8)); 326 p[-1] = (Byte)(0xf8 | (v >> 8));
327 */
146 } 328 }
147 } 329 }
330 while (p < lim);
331 return p;
332 // armt_tail:
333 // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim;
334 // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2));
335 // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
336 // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
148} 337}
338Z7_BRANCH_FUNCS_IMP(ARMT)
149 339
150 340
151SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) 341// #define BR_IA64_NO_INLINE
152{
153 Byte *p;
154 const Byte *lim;
155 size &= ~(size_t)3;
156 ip -= 4;
157 p = data;
158 lim = data + size;
159
160 for (;;)
161 {
162 for (;;)
163 {
164 if (p >= lim)
165 return (SizeT)(p - data);
166 p += 4;
167 /* if ((v & 0xFC000003) == 0x48000001) */
168 if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1)
169 break;
170 }
171 {
172 UInt32 v = GetBe32(p - 4);
173 if (encoding)
174 v += ip + (UInt32)(p - data);
175 else
176 v -= ip + (UInt32)(p - data);
177 v &= 0x03FFFFFF;
178 v |= 0x48000000;
179 SetBe32(p - 4, v);
180 }
181 }
182}
183
184 342
185SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) 343Z7_BRANCH_FUNC_MAIN(IA64)
186{ 344{
187 Byte *p; 345 // Byte *p = data;
188 const Byte *lim; 346 const Byte *lim;
189 size &= ~(size_t)3; 347 size &= ~(SizeT)15;
190 ip -= 4; 348 lim = p + size;
191 p = data; 349 pc -= 1 << 4;
192 lim = data + size; 350 pc >>= 4 - 1;
193 351 // pc -= 1 << 1;
352
194 for (;;) 353 for (;;)
195 { 354 {
355 unsigned m;
196 for (;;) 356 for (;;)
197 { 357 {
198 if (p >= lim) 358 if Z7_UNLIKELY(p == lim)
199 return (SizeT)(p - data); 359 return p;
200 /* 360 m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e));
201 v = GetBe32(p); 361 p += 16;
202 p += 4; 362 pc += 1 << 1;
203 m = v + ((UInt32)5 << 29); 363 if (m &= 3)
204 m ^= (UInt32)7 << 29;
205 m += (UInt32)1 << 22;
206 if ((m & ((UInt32)0x1FF << 23)) == 0)
207 break;
208 */
209 p += 4;
210 if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) ||
211 (p[-4] == 0x7F && (p[-3] >= 0xC0)))
212 break; 364 break;
213 } 365 }
214 { 366 {
215 UInt32 v = GetBe32(p - 4); 367 p += (ptrdiff_t)m * 5 - 20; // negative value is expected here.
216 v <<= 2; 368 do
217 if (encoding) 369 {
218 v += ip + (UInt32)(p - data); 370 const UInt32 t =
219 else 371 #if defined(MY_CPU_X86_OR_AMD64)
220 v -= ip + (UInt32)(p - data); 372 // we use 32-bit load here to reduce code size on x86:
221 373 GetUi32(p);
222 v &= 0x01FFFFFF; 374 #else
223 v -= (UInt32)1 << 24; 375 GetUi16(p);
224 v ^= 0xFF000000; 376 #endif
225 v >>= 2; 377 UInt32 z = GetUi32(p + 1) >> m;
226 v |= 0x40000000; 378 p += 5;
227 SetBe32(p - 4, v); 379 if (((t >> m) & (0x70 << 1)) == 0
380 && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0)
381 {
382 UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z;
383 z ^= v;
384 #ifdef BR_IA64_NO_INLINE
385 v |= (v & ((UInt32)1 << (23 + 1))) >> 3;
386 {
387 UInt32 c = pc;
388 BR_CONVERT_VAL(v, c)
389 }
390 v &= (0x1fffff << 1) | 1;
391 #else
392 {
393 if (encoding)
394 {
395 // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits
396 pc &= (0x1fffff << 1) | 1;
397 v += pc;
398 }
399 else
400 {
401 // pc |= 0xc00000 << 1; // we need to set at least 2 bits
402 pc |= ~(UInt32)((0x1fffff << 1) | 1);
403 v -= pc;
404 }
405 }
406 v &= ~(UInt32)(0x600000 << 1);
407 #endif
408 v += (0x700000 << 1);
409 v &= (0x8fffff << 1) | 1;
410 z |= v;
411 z <<= m;
412 SetUi32(p + 1 - 5, z)
413 }
414 m++;
415 }
416 while (m &= 3); // while (m < 4);
228 } 417 }
229 } 418 }
230} 419}
420Z7_BRANCH_FUNCS_IMP(IA64)