diff options
Diffstat (limited to 'src/lj_emit_arm64.h')
-rw-r--r-- | src/lj_emit_arm64.h | 419 |
1 files changed, 419 insertions, 0 deletions
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h new file mode 100644 index 00000000..f09c0f3a --- /dev/null +++ b/src/lj_emit_arm64.h | |||
@@ -0,0 +1,419 @@ | |||
1 | /* | ||
2 | ** ARM64 instruction emitter. | ||
3 | ** Copyright (C) 2005-2020 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. | ||
6 | ** Sponsored by Cisco Systems, Inc. | ||
7 | */ | ||
8 | |||
9 | /* -- Constant encoding --------------------------------------------------- */ | ||
10 | |||
11 | static uint64_t get_k64val(IRIns *ir) | ||
12 | { | ||
13 | if (ir->o == IR_KINT64) { | ||
14 | return ir_kint64(ir)->u64; | ||
15 | } else if (ir->o == IR_KGC) { | ||
16 | return (uint64_t)ir_kgc(ir); | ||
17 | } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { | ||
18 | return (uint64_t)ir_kptr(ir); | ||
19 | } else { | ||
20 | lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); | ||
21 | return ir->i; /* Sign-extended. */ | ||
22 | } | ||
23 | } | ||
24 | |||
25 | /* Encode constant in K12 format for data processing instructions. */ | ||
26 | static uint32_t emit_isk12(int64_t n) | ||
27 | { | ||
28 | uint64_t k = (n < 0) ? -n : n; | ||
29 | uint32_t m = (n < 0) ? 0x40000000 : 0; | ||
30 | if (k < 0x1000) { | ||
31 | return A64I_K12|m|A64F_U12(k); | ||
32 | } else if ((k & 0xfff000) == k) { | ||
33 | return A64I_K12|m|0x400000|A64F_U12(k>>12); | ||
34 | } | ||
35 | return 0; | ||
36 | } | ||
37 | |||
38 | #define emit_clz64(n) __builtin_clzll(n) | ||
39 | #define emit_ctz64(n) __builtin_ctzll(n) | ||
40 | |||
41 | /* Encode constant in K13 format for logical data processing instructions. */ | ||
42 | static uint32_t emit_isk13(uint64_t n, int is64) | ||
43 | { | ||
44 | int inv = 0, w = 128, lz, tz; | ||
45 | if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ | ||
46 | if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ | ||
47 | do { /* Find the repeat width. */ | ||
48 | if (is64 && (uint32_t)(n^(n>>32))) break; | ||
49 | n = (uint32_t)n; | ||
50 | if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */ | ||
51 | w = 32; if ((n^(n>>16)) & 0xffff) break; | ||
52 | n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; | ||
53 | n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; | ||
54 | n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; | ||
55 | n = n & 0x3; w = 2; | ||
56 | } while (0); | ||
57 | lz = emit_clz64(n); | ||
58 | tz = emit_ctz64(n); | ||
59 | if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ | ||
60 | if (inv) | ||
61 | return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); | ||
62 | else | ||
63 | return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); | ||
64 | } | ||
65 | |||
66 | static uint32_t emit_isfpk64(uint64_t n) | ||
67 | { | ||
68 | uint64_t etop9 = ((n >> 54) & 0x1ff); | ||
69 | if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) { | ||
70 | return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80)); | ||
71 | } | ||
72 | return ~0u; | ||
73 | } | ||
74 | |||
75 | /* -- Emit basic instructions --------------------------------------------- */ | ||
76 | |||
77 | static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra) | ||
78 | { | ||
79 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra); | ||
80 | } | ||
81 | |||
82 | static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) | ||
83 | { | ||
84 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); | ||
85 | } | ||
86 | |||
87 | static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm) | ||
88 | { | ||
89 | *--as->mcp = ai | A64F_D(rd) | A64F_M(rm); | ||
90 | } | ||
91 | |||
92 | static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn) | ||
93 | { | ||
94 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn); | ||
95 | } | ||
96 | |||
97 | static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm) | ||
98 | { | ||
99 | *--as->mcp = ai | A64F_N(rn) | A64F_M(rm); | ||
100 | } | ||
101 | |||
102 | static void emit_d(ASMState *as, A64Ins ai, Reg rd) | ||
103 | { | ||
104 | *--as->mcp = ai | A64F_D(rd); | ||
105 | } | ||
106 | |||
107 | static void emit_n(ASMState *as, A64Ins ai, Reg rn) | ||
108 | { | ||
109 | *--as->mcp = ai | A64F_N(rn); | ||
110 | } | ||
111 | |||
112 | static int emit_checkofs(A64Ins ai, int64_t ofs) | ||
113 | { | ||
114 | int scale = (ai >> 30) & 3; | ||
115 | if (ofs < 0 || (ofs & ((1<<scale)-1))) { | ||
116 | return (ofs >= -256 && ofs <= 255) ? -1 : 0; | ||
117 | } else { | ||
118 | return (ofs < (4096<<scale)) ? 1 : 0; | ||
119 | } | ||
120 | } | ||
121 | |||
122 | static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) | ||
123 | { | ||
124 | int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; | ||
125 | lua_assert(ot); | ||
126 | /* Combine LDR/STR pairs to LDP/STP. */ | ||
127 | if ((sc == 2 || sc == 3) && | ||
128 | (!(ai & 0x400000) || rd != rn) && | ||
129 | as->mcp != as->mcloop) { | ||
130 | uint32_t prev = *as->mcp & ~A64F_D(31); | ||
131 | int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc); | ||
132 | A64Ins aip; | ||
133 | if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) || | ||
134 | prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { | ||
135 | aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); | ||
136 | } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || | ||
137 | prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { | ||
138 | aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); | ||
139 | ofsm = ofs; | ||
140 | } else { | ||
141 | goto nopair; | ||
142 | } | ||
143 | if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) { | ||
144 | *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | | ||
145 | (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); | ||
146 | return; | ||
147 | } | ||
148 | } | ||
149 | nopair: | ||
150 | if (ot == 1) | ||
151 | *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc); | ||
152 | else | ||
153 | *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff); | ||
154 | } | ||
155 | |||
156 | /* -- Emit loads/stores --------------------------------------------------- */ | ||
157 | |||
158 | /* Prefer rematerialization of BASE/L from global_State over spills. */ | ||
159 | #define emit_canremat(ref) ((ref) <= ASMREF_L) | ||
160 | |||
161 | /* Try to find an N-step delta relative to other consts with N < lim. */ | ||
162 | static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) | ||
163 | { | ||
164 | RegSet work = ~as->freeset & RSET_GPR; | ||
165 | if (lim <= 1) return 0; /* Can't beat that. */ | ||
166 | while (work) { | ||
167 | Reg r = rset_picktop(work); | ||
168 | IRRef ref = regcost_ref(as->cost[r]); | ||
169 | lua_assert(r != rd); | ||
170 | if (ref < REF_TRUE) { | ||
171 | uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : | ||
172 | get_k64val(IR(ref)); | ||
173 | int64_t delta = (int64_t)(k - kx); | ||
174 | if (delta == 0) { | ||
175 | emit_dm(as, A64I_MOVx, rd, r); | ||
176 | return 1; | ||
177 | } else { | ||
178 | uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta); | ||
179 | if (k12) { | ||
180 | emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); | ||
181 | return 1; | ||
182 | } | ||
183 | /* Do other ops or multi-step deltas pay off? Probably not. | ||
184 | ** E.g. XOR rarely helps with pointer consts. | ||
185 | */ | ||
186 | } | ||
187 | } | ||
188 | rset_clear(work, r); | ||
189 | } | ||
190 | return 0; /* Failed. */ | ||
191 | } | ||
192 | |||
193 | static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) | ||
194 | { | ||
195 | uint32_t k13 = emit_isk13(u64, is64); | ||
196 | if (k13) { /* Can the constant be represented as a bitmask immediate? */ | ||
197 | emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); | ||
198 | } else { | ||
199 | int i, zeros = 0, ones = 0, neg; | ||
200 | if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ | ||
201 | /* Count homogeneous 16 bit fragments. */ | ||
202 | for (i = 0; i < 4; i++) { | ||
203 | uint64_t frag = (u64 >> i*16) & 0xffff; | ||
204 | zeros += (frag == 0); | ||
205 | ones += (frag == 0xffff); | ||
206 | } | ||
207 | neg = ones > zeros; /* Use MOVN if it pays off. */ | ||
208 | if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { | ||
209 | int shift = 0, lshift = 0; | ||
210 | uint64_t n64 = neg ? ~u64 : u64; | ||
211 | if (n64 != 0) { | ||
212 | /* Find first/last fragment to be filled. */ | ||
213 | shift = (63-emit_clz64(n64)) & ~15; | ||
214 | lshift = emit_ctz64(n64) & ~15; | ||
215 | } | ||
216 | /* MOVK requires the original value (u64). */ | ||
217 | while (shift > lshift) { | ||
218 | uint32_t u16 = (u64 >> shift) & 0xffff; | ||
219 | /* Skip fragments that are correctly filled by MOVN/MOVZ. */ | ||
220 | if (u16 != (neg ? 0xffff : 0)) | ||
221 | emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); | ||
222 | shift -= 16; | ||
223 | } | ||
224 | /* But MOVN needs an inverted value (n64). */ | ||
225 | emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | | ||
226 | A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); | ||
227 | } | ||
228 | } | ||
229 | } | ||
230 | |||
231 | /* Load a 32 bit constant into a GPR. */ | ||
232 | #define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) | ||
233 | |||
234 | /* Load a 64 bit constant into a GPR. */ | ||
235 | #define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) | ||
236 | |||
237 | #define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) | ||
238 | |||
239 | #define glofs(as, k) \ | ||
240 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) | ||
241 | #define mcpofs(as, k) \ | ||
242 | ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) | ||
243 | #define checkmcpofs(as, k) \ | ||
244 | (A64F_S_OK(mcpofs(as, k)>>2, 19)) | ||
245 | |||
246 | static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); | ||
247 | |||
248 | /* Get/set from constant pointer. */ | ||
249 | static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) | ||
250 | { | ||
251 | /* First, check if ip + offset is in range. */ | ||
252 | if ((ai & 0x00400000) && checkmcpofs(as, p)) { | ||
253 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); | ||
254 | } else { | ||
255 | Reg base = RID_GL; /* Next, try GL + offset. */ | ||
256 | int64_t ofs = glofs(as, p); | ||
257 | if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ | ||
258 | int64_t i64 = i64ptr(p); | ||
259 | base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); | ||
260 | ofs = i64 & 0x7fffull; | ||
261 | } | ||
262 | emit_lso(as, ai, r, base, ofs); | ||
263 | } | ||
264 | } | ||
265 | |||
266 | /* Load 64 bit IR constant into register. */ | ||
267 | static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) | ||
268 | { | ||
269 | const uint64_t *k = &ir_k64(ir)->u64; | ||
270 | int64_t ofs; | ||
271 | if (r >= RID_MAX_GPR) { | ||
272 | uint32_t fpk = emit_isfpk64(*k); | ||
273 | if (fpk != ~0u) { | ||
274 | emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31)); | ||
275 | return; | ||
276 | } | ||
277 | } | ||
278 | ofs = glofs(as, k); | ||
279 | if (emit_checkofs(A64I_LDRx, ofs)) { | ||
280 | emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, | ||
281 | (r & 31), RID_GL, ofs); | ||
282 | } else { | ||
283 | if (r >= RID_MAX_GPR) { | ||
284 | emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); | ||
285 | r = RID_TMP; | ||
286 | } | ||
287 | if (checkmcpofs(as, k)) | ||
288 | emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); | ||
289 | else | ||
290 | emit_loadu64(as, r, *k); | ||
291 | } | ||
292 | } | ||
293 | |||
294 | /* Get/set global_State fields. */ | ||
295 | #define emit_getgl(as, r, field) \ | ||
296 | emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field) | ||
297 | #define emit_setgl(as, r, field) \ | ||
298 | emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field) | ||
299 | |||
300 | /* Trace number is determined from pc of exit instruction. */ | ||
301 | #define emit_setvmstate(as, i) UNUSED(i) | ||
302 | |||
303 | /* -- Emit control-flow instructions -------------------------------------- */ | ||
304 | |||
305 | /* Label for internal jumps. */ | ||
306 | typedef MCode *MCLabel; | ||
307 | |||
308 | /* Return label pointing to current PC. */ | ||
309 | #define emit_label(as) ((as)->mcp) | ||
310 | |||
311 | static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) | ||
312 | { | ||
313 | MCode *p = --as->mcp; | ||
314 | ptrdiff_t delta = target - p; | ||
315 | lua_assert(A64F_S_OK(delta, 19)); | ||
316 | *p = A64I_BCC | A64F_S19(delta) | cond; | ||
317 | } | ||
318 | |||
319 | static void emit_branch(ASMState *as, A64Ins ai, MCode *target) | ||
320 | { | ||
321 | MCode *p = --as->mcp; | ||
322 | ptrdiff_t delta = target - p; | ||
323 | lua_assert(A64F_S_OK(delta, 26)); | ||
324 | *p = ai | A64F_S26(delta); | ||
325 | } | ||
326 | |||
327 | static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target) | ||
328 | { | ||
329 | MCode *p = --as->mcp; | ||
330 | ptrdiff_t delta = target - p; | ||
331 | lua_assert(bit < 63 && A64F_S_OK(delta, 14)); | ||
332 | if (bit > 31) ai |= A64I_X; | ||
333 | *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r; | ||
334 | } | ||
335 | |||
336 | static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) | ||
337 | { | ||
338 | MCode *p = --as->mcp; | ||
339 | ptrdiff_t delta = target - p; | ||
340 | lua_assert(A64F_S_OK(delta, 19)); | ||
341 | *p = ai | A64F_S19(delta) | r; | ||
342 | } | ||
343 | |||
344 | #define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) | ||
345 | |||
346 | static void emit_call(ASMState *as, void *target) | ||
347 | { | ||
348 | MCode *p = --as->mcp; | ||
349 | ptrdiff_t delta = (char *)target - (char *)p; | ||
350 | if (A64F_S_OK(delta>>2, 26)) { | ||
351 | *p = A64I_BL | A64F_S26(delta>>2); | ||
352 | } else { /* Target out of range: need indirect call. But don't use R0-R7. */ | ||
353 | Reg r = ra_allock(as, i64ptr(target), | ||
354 | RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); | ||
355 | *p = A64I_BLR | A64F_N(r); | ||
356 | } | ||
357 | } | ||
358 | |||
359 | /* -- Emit generic operations --------------------------------------------- */ | ||
360 | |||
361 | /* Generic move between two regs. */ | ||
362 | static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | ||
363 | { | ||
364 | if (dst >= RID_MAX_GPR) { | ||
365 | emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S, | ||
366 | (dst & 31), (src & 31)); | ||
367 | return; | ||
368 | } | ||
369 | if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ | ||
370 | MCode ins = *as->mcp, swp = (src^dst); | ||
371 | if ((ins & 0xbf800000) == 0xb9000000) { | ||
372 | if (!((ins ^ (dst << 5)) & 0x000003e0)) | ||
373 | *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */ | ||
374 | if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f)) | ||
375 | *as->mcp = ins ^ swp; /* Swap D in store. */ | ||
376 | } | ||
377 | } | ||
378 | emit_dm(as, A64I_MOVx, dst, src); | ||
379 | } | ||
380 | |||
381 | /* Generic load of register with base and (small) offset address. */ | ||
382 | static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | ||
383 | { | ||
384 | if (r >= RID_MAX_GPR) | ||
385 | emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs); | ||
386 | else | ||
387 | emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs); | ||
388 | } | ||
389 | |||
390 | /* Generic store of register with base and (small) offset address. */ | ||
391 | static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | ||
392 | { | ||
393 | if (r >= RID_MAX_GPR) | ||
394 | emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs); | ||
395 | else | ||
396 | emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs); | ||
397 | } | ||
398 | |||
399 | /* Emit an arithmetic operation with a constant operand. */ | ||
400 | static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src, | ||
401 | int32_t i, RegSet allow) | ||
402 | { | ||
403 | uint32_t k = emit_isk12(i); | ||
404 | if (k) | ||
405 | emit_dn(as, ai^k, dest, src); | ||
406 | else | ||
407 | emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); | ||
408 | } | ||
409 | |||
410 | /* Add offset to pointer. */ | ||
411 | static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | ||
412 | { | ||
413 | if (ofs) | ||
414 | emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r, | ||
415 | ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r)); | ||
416 | } | ||
417 | |||
418 | #define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) | ||
419 | |||