diff options
Diffstat (limited to 'C/Bra.c')
-rw-r--r-- | C/Bra.c | 496 |
1 files changed, 343 insertions, 153 deletions
@@ -1,230 +1,420 @@ | |||
1 | /* Bra.c -- Converters for RISC code | 1 | /* Bra.c -- Branch converters for RISC code |
2 | 2021-02-09 : Igor Pavlov : Public domain */ | 2 | 2023-04-02 : Igor Pavlov : Public domain */ |
3 | 3 | ||
4 | #include "Precomp.h" | 4 | #include "Precomp.h" |
5 | 5 | ||
6 | #include "CpuArch.h" | ||
7 | #include "Bra.h" | 6 | #include "Bra.h" |
7 | #include "CpuArch.h" | ||
8 | #include "RotateDefs.h" | ||
9 | |||
10 | #if defined(MY_CPU_SIZEOF_POINTER) \ | ||
11 | && ( MY_CPU_SIZEOF_POINTER == 4 \ | ||
12 | || MY_CPU_SIZEOF_POINTER == 8) | ||
13 | #define BR_CONV_USE_OPT_PC_PTR | ||
14 | #endif | ||
15 | |||
16 | #ifdef BR_CONV_USE_OPT_PC_PTR | ||
17 | #define BR_PC_INIT pc -= (UInt32)(SizeT)p; | ||
18 | #define BR_PC_GET (pc + (UInt32)(SizeT)p) | ||
19 | #else | ||
20 | #define BR_PC_INIT pc += (UInt32)size; | ||
21 | #define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p)) | ||
22 | // #define BR_PC_INIT | ||
23 | // #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data)) | ||
24 | #endif | ||
25 | |||
26 | #define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c; | ||
27 | // #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c; | ||
28 | |||
29 | #define Z7_BRANCH_CONV(name) z7_BranchConv_ ## name | ||
30 | |||
31 | #define Z7_BRANCH_FUNC_MAIN(name) \ | ||
32 | static \ | ||
33 | Z7_FORCE_INLINE \ | ||
34 | Z7_ATTRIB_NO_VECTOR \ | ||
35 | Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding) | ||
8 | 36 | ||
9 | SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) | 37 | #define Z7_BRANCH_FUNC_IMP(name, m, encoding) \ |
38 | Z7_NO_INLINE \ | ||
39 | Z7_ATTRIB_NO_VECTOR \ | ||
40 | Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \ | ||
41 | { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \ | ||
42 | |||
43 | #ifdef Z7_EXTRACT_ONLY | ||
44 | #define Z7_BRANCH_FUNCS_IMP(name) \ | ||
45 | Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) | ||
46 | #else | ||
47 | #define Z7_BRANCH_FUNCS_IMP(name) \ | ||
48 | Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC, 0) \ | ||
49 | Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC, 1) | ||
50 | #endif | ||
51 | |||
52 | #if defined(__clang__) | ||
53 | #define BR_EXTERNAL_FOR | ||
54 | #define BR_NEXT_ITERATION continue; | ||
55 | #else | ||
56 | #define BR_EXTERNAL_FOR for (;;) | ||
57 | #define BR_NEXT_ITERATION break; | ||
58 | #endif | ||
59 | |||
60 | #if defined(__clang__) && (__clang_major__ >= 8) \ | ||
61 | || defined(__GNUC__) && (__GNUC__ >= 1000) \ | ||
62 | // GCC is not good for __builtin_expect() here | ||
63 | /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */ | ||
64 | // #define Z7_unlikely [[unlikely]] | ||
65 | // #define Z7_LIKELY(x) (__builtin_expect((x), 1)) | ||
66 | #define Z7_UNLIKELY(x) (__builtin_expect((x), 0)) | ||
67 | // #define Z7_likely [[likely]] | ||
68 | #else | ||
69 | // #define Z7_LIKELY(x) (x) | ||
70 | #define Z7_UNLIKELY(x) (x) | ||
71 | // #define Z7_likely | ||
72 | #endif | ||
73 | |||
74 | |||
75 | Z7_BRANCH_FUNC_MAIN(ARM64) | ||
10 | { | 76 | { |
11 | Byte *p; | 77 | // Byte *p = data; |
12 | const Byte *lim; | 78 | const Byte *lim; |
13 | size &= ~(size_t)3; | 79 | const UInt32 flag = (UInt32)1 << (24 - 4); |
14 | ip += 4; | 80 | const UInt32 mask = ((UInt32)1 << 24) - (flag << 1); |
15 | p = data; | 81 | size &= ~(SizeT)3; |
16 | lim = data + size; | 82 | // if (size == 0) return p; |
83 | lim = p + size; | ||
84 | BR_PC_INIT | ||
85 | pc -= 4; // because (p) will point to next instruction | ||
86 | |||
87 | BR_EXTERNAL_FOR | ||
88 | { | ||
89 | // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE | ||
90 | for (;;) | ||
91 | { | ||
92 | UInt32 v; | ||
93 | if Z7_UNLIKELY(p == lim) | ||
94 | return p; | ||
95 | v = GetUi32a(p); | ||
96 | p += 4; | ||
97 | if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0) | ||
98 | { | ||
99 | UInt32 c = BR_PC_GET >> 2; | ||
100 | BR_CONVERT_VAL(v, c) | ||
101 | v &= 0x03ffffff; | ||
102 | v |= 0x94000000; | ||
103 | SetUi32a(p - 4, v) | ||
104 | BR_NEXT_ITERATION | ||
105 | } | ||
106 | // v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0) | ||
107 | v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0) | ||
108 | { | ||
109 | UInt32 z, c; | ||
110 | // v = rotrFixed(v, 8); | ||
111 | v += flag; if Z7_UNLIKELY(v & mask) continue; | ||
112 | z = (v & 0xffffffe0) | (v >> 26); | ||
113 | c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7; | ||
114 | BR_CONVERT_VAL(z, c) | ||
115 | v &= 0x1f; | ||
116 | v |= 0x90000000; | ||
117 | v |= z << 26; | ||
118 | v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag); | ||
119 | SetUi32a(p - 4, v) | ||
120 | } | ||
121 | } | ||
122 | } | ||
123 | } | ||
124 | Z7_BRANCH_FUNCS_IMP(ARM64) | ||
17 | 125 | ||
18 | if (encoding) | ||
19 | 126 | ||
127 | Z7_BRANCH_FUNC_MAIN(ARM) | ||
128 | { | ||
129 | // Byte *p = data; | ||
130 | const Byte *lim; | ||
131 | size &= ~(SizeT)3; | ||
132 | lim = p + size; | ||
133 | BR_PC_INIT | ||
134 | /* in ARM: branch offset is relative to the +2 instructions from current instruction. | ||
135 | (p) will point to next instruction */ | ||
136 | pc += 8 - 4; | ||
137 | |||
20 | for (;;) | 138 | for (;;) |
21 | { | 139 | { |
22 | for (;;) | 140 | for (;;) |
23 | { | 141 | { |
24 | if (p >= lim) | 142 | if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; |
25 | return (SizeT)(p - data); | 143 | if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break; |
26 | p += 4; | ||
27 | if (p[-1] == 0xEB) | ||
28 | break; | ||
29 | } | 144 | } |
30 | { | 145 | { |
31 | UInt32 v = GetUi32(p - 4); | 146 | UInt32 v = GetUi32a(p - 4); |
32 | v <<= 2; | 147 | UInt32 c = BR_PC_GET >> 2; |
33 | v += ip + (UInt32)(p - data); | 148 | BR_CONVERT_VAL(v, c) |
34 | v >>= 2; | 149 | v &= 0x00ffffff; |
35 | v &= 0x00FFFFFF; | 150 | v |= 0xeb000000; |
36 | v |= 0xEB000000; | 151 | SetUi32a(p - 4, v) |
37 | SetUi32(p - 4, v); | ||
38 | } | 152 | } |
39 | } | 153 | } |
154 | } | ||
155 | Z7_BRANCH_FUNCS_IMP(ARM) | ||
156 | |||
40 | 157 | ||
158 | Z7_BRANCH_FUNC_MAIN(PPC) | ||
159 | { | ||
160 | // Byte *p = data; | ||
161 | const Byte *lim; | ||
162 | size &= ~(SizeT)3; | ||
163 | lim = p + size; | ||
164 | BR_PC_INIT | ||
165 | pc -= 4; // because (p) will point to next instruction | ||
166 | |||
41 | for (;;) | 167 | for (;;) |
42 | { | 168 | { |
169 | UInt32 v; | ||
43 | for (;;) | 170 | for (;;) |
44 | { | 171 | { |
45 | if (p >= lim) | 172 | if Z7_UNLIKELY(p == lim) |
46 | return (SizeT)(p - data); | 173 | return p; |
174 | // v = GetBe32a(p); | ||
175 | v = *(UInt32 *)(void *)p; | ||
47 | p += 4; | 176 | p += 4; |
48 | if (p[-1] == 0xEB) | 177 | // if ((v & 0xfc000003) == 0x48000001) break; |
49 | break; | 178 | // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break; |
179 | if Z7_UNLIKELY( | ||
180 | ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001)) | ||
181 | & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break; | ||
50 | } | 182 | } |
51 | { | 183 | { |
52 | UInt32 v = GetUi32(p - 4); | 184 | v = Z7_CONV_NATIVE_TO_BE_32(v); |
53 | v <<= 2; | 185 | { |
54 | v -= ip + (UInt32)(p - data); | 186 | UInt32 c = BR_PC_GET; |
55 | v >>= 2; | 187 | BR_CONVERT_VAL(v, c) |
56 | v &= 0x00FFFFFF; | 188 | } |
57 | v |= 0xEB000000; | 189 | v &= 0x03ffffff; |
58 | SetUi32(p - 4, v); | 190 | v |= 0x48000000; |
191 | SetBe32a(p - 4, v) | ||
59 | } | 192 | } |
60 | } | 193 | } |
61 | } | 194 | } |
195 | Z7_BRANCH_FUNCS_IMP(PPC) | ||
62 | 196 | ||
63 | 197 | ||
64 | SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) | 198 | #ifdef Z7_CPU_FAST_ROTATE_SUPPORTED |
199 | #define BR_SPARC_USE_ROTATE | ||
200 | #endif | ||
201 | |||
202 | Z7_BRANCH_FUNC_MAIN(SPARC) | ||
65 | { | 203 | { |
66 | Byte *p; | 204 | // Byte *p = data; |
67 | const Byte *lim; | 205 | const Byte *lim; |
68 | size &= ~(size_t)1; | 206 | const UInt32 flag = (UInt32)1 << 22; |
69 | p = data; | 207 | size &= ~(SizeT)3; |
70 | lim = data + size - 4; | 208 | lim = p + size; |
71 | 209 | BR_PC_INIT | |
72 | if (encoding) | 210 | pc -= 4; // because (p) will point to next instruction |
73 | |||
74 | for (;;) | 211 | for (;;) |
75 | { | 212 | { |
76 | UInt32 b1; | 213 | UInt32 v; |
77 | for (;;) | 214 | for (;;) |
78 | { | 215 | { |
79 | UInt32 b3; | 216 | if Z7_UNLIKELY(p == lim) |
80 | if (p > lim) | 217 | return p; |
81 | return (SizeT)(p - data); | 218 | /* // the code without GetBe32a(): |
82 | b1 = p[1]; | 219 | { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; } |
83 | b3 = p[3]; | 220 | */ |
84 | p += 2; | 221 | v = GetBe32a(p); |
85 | b1 ^= 8; | 222 | p += 4; |
86 | if ((b3 & b1) >= 0xF8) | 223 | #ifdef BR_SPARC_USE_ROTATE |
224 | v = rotlFixed(v, 2); | ||
225 | v += (flag << 2) - 1; | ||
226 | if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0) | ||
227 | #else | ||
228 | v += (UInt32)5 << 29; | ||
229 | v ^= (UInt32)7 << 29; | ||
230 | v += flag; | ||
231 | if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0) | ||
232 | #endif | ||
87 | break; | 233 | break; |
88 | } | 234 | } |
89 | { | 235 | { |
90 | UInt32 v = | 236 | // UInt32 v = GetBe32a(p - 4); |
91 | ((UInt32)b1 << 19) | 237 | #ifndef BR_SPARC_USE_ROTATE |
92 | + (((UInt32)p[1] & 0x7) << 8) | 238 | v <<= 2; |
93 | + (((UInt32)p[-2] << 11)) | 239 | #endif |
94 | + (p[0]); | ||
95 | |||
96 | p += 2; | ||
97 | { | 240 | { |
98 | UInt32 cur = (ip + (UInt32)(p - data)) >> 1; | 241 | UInt32 c = BR_PC_GET; |
99 | v += cur; | 242 | BR_CONVERT_VAL(v, c) |
100 | } | 243 | } |
101 | 244 | v &= (flag << 3) - 1; | |
102 | p[-4] = (Byte)(v >> 11); | 245 | #ifdef BR_SPARC_USE_ROTATE |
103 | p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7)); | 246 | v -= (flag << 2) - 1; |
104 | p[-2] = (Byte)v; | 247 | v = rotrFixed(v, 2); |
105 | p[-1] = (Byte)(0xF8 | (v >> 8)); | 248 | #else |
249 | v -= (flag << 2); | ||
250 | v >>= 2; | ||
251 | v |= (UInt32)1 << 30; | ||
252 | #endif | ||
253 | SetBe32a(p - 4, v) | ||
106 | } | 254 | } |
107 | } | 255 | } |
256 | } | ||
257 | Z7_BRANCH_FUNCS_IMP(SPARC) | ||
258 | |||
259 | |||
260 | Z7_BRANCH_FUNC_MAIN(ARMT) | ||
261 | { | ||
262 | // Byte *p = data; | ||
263 | Byte *lim; | ||
264 | size &= ~(SizeT)1; | ||
265 | // if (size == 0) return p; | ||
266 | if (size <= 2) return p; | ||
267 | size -= 2; | ||
268 | lim = p + size; | ||
269 | BR_PC_INIT | ||
270 | /* in ARM: branch offset is relative to the +2 instructions from current instruction. | ||
271 | (p) will point to the +2 instructions from current instruction */ | ||
272 | // pc += 4 - 4; | ||
273 | // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1; | ||
274 | // #define ARMT_TAIL_PROC { goto armt_tail; } | ||
275 | #define ARMT_TAIL_PROC { return p; } | ||
108 | 276 | ||
109 | for (;;) | 277 | do |
110 | { | 278 | { |
111 | UInt32 b1; | 279 | /* in MSVC 32-bit x86 compilers: |
280 | UInt32 version : it loads value from memory with movzx | ||
281 | Byte version : it loads value to 8-bit register (AL/CL) | ||
282 | movzx version is slightly faster in some cpus | ||
283 | */ | ||
284 | unsigned b1; | ||
285 | // Byte / unsigned | ||
286 | b1 = p[1]; | ||
287 | // optimized version to reduce one (p >= lim) check: | ||
288 | // unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8) | ||
112 | for (;;) | 289 | for (;;) |
113 | { | 290 | { |
114 | UInt32 b3; | 291 | unsigned b3; // Byte / UInt32 |
115 | if (p > lim) | 292 | /* (Byte)(b3) normalization can use low byte computations in MSVC. |
116 | return (SizeT)(p - data); | 293 | It gives smaller code, and no loss of speed in some compilers/cpus. |
117 | b1 = p[1]; | 294 | But new MSVC 32-bit x86 compilers use more slow load |
118 | b3 = p[3]; | 295 | from memory to low byte register in that case. |
119 | p += 2; | 296 | So we try to use full 32-bit computations for faster code. |
120 | b1 ^= 8; | 297 | */ |
121 | if ((b3 & b1) >= 0xF8) | 298 | // if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break; |
122 | break; | 299 | if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break; |
300 | if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break; | ||
123 | } | 301 | } |
124 | { | 302 | { |
303 | /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation. | ||
304 | But gcc/clang for arm64 can use bfi instruction for full code here */ | ||
125 | UInt32 v = | 305 | UInt32 v = |
126 | ((UInt32)b1 << 19) | 306 | ((UInt32)GetUi16a(p - 2) << 11) | |
307 | ((UInt32)GetUi16a(p) & 0x7FF); | ||
308 | /* | ||
309 | UInt32 v = | ||
310 | ((UInt32)p[1 - 2] << 19) | ||
127 | + (((UInt32)p[1] & 0x7) << 8) | 311 | + (((UInt32)p[1] & 0x7) << 8) |
128 | + (((UInt32)p[-2] << 11)) | 312 | + (((UInt32)p[-2] << 11)) |
129 | + (p[0]); | 313 | + (p[0]); |
130 | 314 | */ | |
131 | p += 2; | 315 | p += 2; |
132 | { | 316 | { |
133 | UInt32 cur = (ip + (UInt32)(p - data)) >> 1; | 317 | UInt32 c = BR_PC_GET >> 1; |
134 | v -= cur; | 318 | BR_CONVERT_VAL(v, c) |
135 | } | 319 | } |
136 | 320 | SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000)) | |
321 | SetUi16a(p - 2, (UInt16)(v | 0xf800)) | ||
137 | /* | 322 | /* |
138 | SetUi16(p - 4, (UInt16)(((v >> 11) & 0x7FF) | 0xF000)); | ||
139 | SetUi16(p - 2, (UInt16)(v | 0xF800)); | ||
140 | */ | ||
141 | |||
142 | p[-4] = (Byte)(v >> 11); | 323 | p[-4] = (Byte)(v >> 11); |
143 | p[-3] = (Byte)(0xF0 | ((v >> 19) & 0x7)); | 324 | p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7)); |
144 | p[-2] = (Byte)v; | 325 | p[-2] = (Byte)v; |
145 | p[-1] = (Byte)(0xF8 | (v >> 8)); | 326 | p[-1] = (Byte)(0xf8 | (v >> 8)); |
327 | */ | ||
146 | } | 328 | } |
147 | } | 329 | } |
330 | while (p < lim); | ||
331 | return p; | ||
332 | // armt_tail: | ||
333 | // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim; | ||
334 | // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2)); | ||
335 | // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2)); | ||
336 | // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2)); | ||
148 | } | 337 | } |
338 | Z7_BRANCH_FUNCS_IMP(ARMT) | ||
149 | 339 | ||
150 | 340 | ||
151 | SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) | 341 | // #define BR_IA64_NO_INLINE |
152 | { | ||
153 | Byte *p; | ||
154 | const Byte *lim; | ||
155 | size &= ~(size_t)3; | ||
156 | ip -= 4; | ||
157 | p = data; | ||
158 | lim = data + size; | ||
159 | |||
160 | for (;;) | ||
161 | { | ||
162 | for (;;) | ||
163 | { | ||
164 | if (p >= lim) | ||
165 | return (SizeT)(p - data); | ||
166 | p += 4; | ||
167 | /* if ((v & 0xFC000003) == 0x48000001) */ | ||
168 | if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) | ||
169 | break; | ||
170 | } | ||
171 | { | ||
172 | UInt32 v = GetBe32(p - 4); | ||
173 | if (encoding) | ||
174 | v += ip + (UInt32)(p - data); | ||
175 | else | ||
176 | v -= ip + (UInt32)(p - data); | ||
177 | v &= 0x03FFFFFF; | ||
178 | v |= 0x48000000; | ||
179 | SetBe32(p - 4, v); | ||
180 | } | ||
181 | } | ||
182 | } | ||
183 | |||
184 | 342 | ||
185 | SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding) | 343 | Z7_BRANCH_FUNC_MAIN(IA64) |
186 | { | 344 | { |
187 | Byte *p; | 345 | // Byte *p = data; |
188 | const Byte *lim; | 346 | const Byte *lim; |
189 | size &= ~(size_t)3; | 347 | size &= ~(SizeT)15; |
190 | ip -= 4; | 348 | lim = p + size; |
191 | p = data; | 349 | pc -= 1 << 4; |
192 | lim = data + size; | 350 | pc >>= 4 - 1; |
193 | 351 | // pc -= 1 << 1; | |
352 | |||
194 | for (;;) | 353 | for (;;) |
195 | { | 354 | { |
355 | unsigned m; | ||
196 | for (;;) | 356 | for (;;) |
197 | { | 357 | { |
198 | if (p >= lim) | 358 | if Z7_UNLIKELY(p == lim) |
199 | return (SizeT)(p - data); | 359 | return p; |
200 | /* | 360 | m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e)); |
201 | v = GetBe32(p); | 361 | p += 16; |
202 | p += 4; | 362 | pc += 1 << 1; |
203 | m = v + ((UInt32)5 << 29); | 363 | if (m &= 3) |
204 | m ^= (UInt32)7 << 29; | ||
205 | m += (UInt32)1 << 22; | ||
206 | if ((m & ((UInt32)0x1FF << 23)) == 0) | ||
207 | break; | ||
208 | */ | ||
209 | p += 4; | ||
210 | if ((p[-4] == 0x40 && (p[-3] & 0xC0) == 0) || | ||
211 | (p[-4] == 0x7F && (p[-3] >= 0xC0))) | ||
212 | break; | 364 | break; |
213 | } | 365 | } |
214 | { | 366 | { |
215 | UInt32 v = GetBe32(p - 4); | 367 | p += (ptrdiff_t)m * 5 - 20; // negative value is expected here. |
216 | v <<= 2; | 368 | do |
217 | if (encoding) | 369 | { |
218 | v += ip + (UInt32)(p - data); | 370 | const UInt32 t = |
219 | else | 371 | #if defined(MY_CPU_X86_OR_AMD64) |
220 | v -= ip + (UInt32)(p - data); | 372 | // we use 32-bit load here to reduce code size on x86: |
221 | 373 | GetUi32(p); | |
222 | v &= 0x01FFFFFF; | 374 | #else |
223 | v -= (UInt32)1 << 24; | 375 | GetUi16(p); |
224 | v ^= 0xFF000000; | 376 | #endif |
225 | v >>= 2; | 377 | UInt32 z = GetUi32(p + 1) >> m; |
226 | v |= 0x40000000; | 378 | p += 5; |
227 | SetBe32(p - 4, v); | 379 | if (((t >> m) & (0x70 << 1)) == 0 |
380 | && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0) | ||
381 | { | ||
382 | UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z; | ||
383 | z ^= v; | ||
384 | #ifdef BR_IA64_NO_INLINE | ||
385 | v |= (v & ((UInt32)1 << (23 + 1))) >> 3; | ||
386 | { | ||
387 | UInt32 c = pc; | ||
388 | BR_CONVERT_VAL(v, c) | ||
389 | } | ||
390 | v &= (0x1fffff << 1) | 1; | ||
391 | #else | ||
392 | { | ||
393 | if (encoding) | ||
394 | { | ||
395 | // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits | ||
396 | pc &= (0x1fffff << 1) | 1; | ||
397 | v += pc; | ||
398 | } | ||
399 | else | ||
400 | { | ||
401 | // pc |= 0xc00000 << 1; // we need to set at least 2 bits | ||
402 | pc |= ~(UInt32)((0x1fffff << 1) | 1); | ||
403 | v -= pc; | ||
404 | } | ||
405 | } | ||
406 | v &= ~(UInt32)(0x600000 << 1); | ||
407 | #endif | ||
408 | v += (0x700000 << 1); | ||
409 | v &= (0x8fffff << 1) | 1; | ||
410 | z |= v; | ||
411 | z <<= m; | ||
412 | SetUi32(p + 1 - 5, z) | ||
413 | } | ||
414 | m++; | ||
415 | } | ||
416 | while (m &= 3); // while (m < 4); | ||
228 | } | 417 | } |
229 | } | 418 | } |
230 | } | 419 | } |
420 | Z7_BRANCH_FUNCS_IMP(IA64) | ||