summaryrefslogtreecommitdiff
path: root/src/lj_emit_arm64.h
diff options
context:
space:
mode:
authorMike Pall <mike>2016-11-20 22:16:08 +0100
committerMike Pall <mike>2016-11-20 22:18:14 +0100
commit04b60707d7d117da22b40736a353e2a10179108a (patch)
treed11f50b00a8589108f5ebeeb005a12071fe6fcdf /src/lj_emit_arm64.h
parent13642b75ac37957d9e2a37b35ebec69d6d4b3bc1 (diff)
downloadluajit-04b60707d7d117da22b40736a353e2a10179108a.tar.gz
luajit-04b60707d7d117da22b40736a353e2a10179108a.tar.bz2
luajit-04b60707d7d117da22b40736a353e2a10179108a.zip
ARM64: Add JIT compiler backend.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. Sponsored by Cisco Systems, Inc.
Diffstat (limited to 'src/lj_emit_arm64.h')
-rw-r--r--src/lj_emit_arm64.h397
1 files changed, 397 insertions, 0 deletions
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
new file mode 100644
index 00000000..eb8f7fc7
--- /dev/null
+++ b/src/lj_emit_arm64.h
@@ -0,0 +1,397 @@
1/*
2** ARM64 instruction emitter.
3** Copyright (C) 2005-2016 Mike Pall. See Copyright Notice in luajit.h
4**
5** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
6** Sponsored by Cisco Systems, Inc.
7*/
8
9/* -- Constant encoding --------------------------------------------------- */
10
11static uint64_t get_k64val(IRIns *ir)
12{
13 if (ir->o == IR_KINT64) {
14 return ir_kint64(ir)->u64;
15 } else if (ir->o == IR_KGC) {
16 return (uint64_t)ir_kgc(ir);
17 } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
18 return (uint64_t)ir_kptr(ir);
19 } else {
20 lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL);
21 return ir->i; /* Sign-extended. */
22 }
23}
24
25/* Encode constant in K12 format for data processing instructions. */
26static uint32_t emit_isk12(int64_t n)
27{
28 uint64_t k = (n < 0) ? -n : n;
29 uint32_t m = (n < 0) ? 0x40000000 : 0;
30 if (k < 0x1000) {
31 return A64I_K12|m|A64F_U12(k);
32 } else if ((k & 0xfff000) == k) {
33 return A64I_K12|m|0x400000|A64F_U12(k>>12);
34 }
35 return 0;
36}
37
38#define emit_clz64(n) __builtin_clzll(n)
39#define emit_ctz64(n) __builtin_ctzll(n)
40
41/* Encode constant in K13 format for logical data processing instructions. */
42static uint32_t emit_isk13(uint64_t n, int is64)
43{
44 int inv = 0, w = 128, lz, tz;
45 if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */
46 if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */
47 do { /* Find the repeat width. */
48 if (is64 && (uint32_t)(n^(n>>32))) break;
49 n = (uint32_t)n; w = 32; if ((n^(n>>16)) & 0xffff) break;
50 n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break;
51 n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break;
52 n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break;
53 n = n & 0x3; w = 2;
54 } while (0);
55 lz = emit_clz64(n);
56 tz = emit_ctz64(n);
57 if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */
58 if (inv)
59 return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10);
60 else
61 return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10);
62}
63
64static uint32_t emit_isfpk64(uint64_t n)
65{
66 uint64_t etop9 = ((n >> 54) & 0x1ff);
67 if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) {
68 return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80));
69 }
70 return ~0u;
71}
72
73/* -- Emit basic instructions --------------------------------------------- */
74
75static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm)
76{
77 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);
78}
79
80static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm)
81{
82 *--as->mcp = ai | A64F_D(rd) | A64F_M(rm);
83}
84
85static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn)
86{
87 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn);
88}
89
90static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm)
91{
92 *--as->mcp = ai | A64F_N(rn) | A64F_M(rm);
93}
94
95static void emit_d(ASMState *as, A64Ins ai, Reg rd)
96{
97 *--as->mcp = ai | A64F_D(rd);
98}
99
100static void emit_n(ASMState *as, A64Ins ai, Reg rn)
101{
102 *--as->mcp = ai | A64F_N(rn);
103}
104
105static int emit_checkofs(A64Ins ai, int64_t ofs)
106{
107 int scale = (ai >> 30) & 3;
108 if (ofs < 0 || (ofs & ((1<<scale)-1))) {
109 return (ofs >= -256 && ofs <= 255) ? -1 : 0;
110 } else {
111 return (ofs < (4096<<scale)) ? 1 : 0;
112 }
113}
114
115static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
116{
117 int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3;
118 lua_assert(ot);
119 /* Combine LDR/STR pairs to LDP/STP. */
120 if ((sc == 2 || sc == 3) &&
121 (!(ai & 0x400000) || rd != rn) &&
122 as->mcp != as->mcloop) {
123 uint32_t prev = *as->mcp & ~A64F_D(31);
124 int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc);
125 A64Ins aip;
126 if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) ||
127 prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) {
128 aip = (A64F_A(rd) | A64F_D(*as->mcp & 31));
129 } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) ||
130 prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) {
131 aip = (A64F_D(rd) | A64F_A(*as->mcp & 31));
132 ofsm = ofs;
133 } else {
134 goto nopair;
135 }
136 if (ofsm >= (-64<<sc) && ofsm <= (63<<sc)) {
137 *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) |
138 (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
139 return;
140 }
141 }
142nopair:
143 if (ot == 1)
144 *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc);
145 else
146 *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff);
147}
148
149/* -- Emit loads/stores --------------------------------------------------- */
150
151/* Prefer rematerialization of BASE/L from global_State over spills. */
152#define emit_canremat(ref) ((ref) <= ASMREF_L)
153
154/* Try to find an N-step delta relative to other consts with N < lim. */
155static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
156{
157 RegSet work = ~as->freeset & RSET_GPR;
158 if (lim <= 1) return 0; /* Can't beat that. */
159 while (work) {
160 Reg r = rset_picktop(work);
161 IRRef ref = regcost_ref(as->cost[r]);
162 lua_assert(r != rd);
163 if (ref < REF_TRUE) {
164 uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) :
165 get_k64val(IR(ref));
166 int64_t delta = (int64_t)(k - kx);
167 if (delta == 0) {
168 emit_dm(as, A64I_MOVx, rd, r);
169 return 1;
170 } else {
171 uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta);
172 if (k12) {
173 emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r);
174 return 1;
175 }
176 /* Do other ops or multi-step deltas pay off? Probably not.
177 ** E.g. XOR rarely helps with pointer consts.
178 */
179 }
180 }
181 rset_clear(work, r);
182 }
183 return 0; /* Failed. */
184}
185
186static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
187{
188 uint32_t k13 = emit_isk13(u64, is64);
189 if (k13) { /* Can the constant be represented as a bitmask immediate? */
190 emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
191 } else {
192 int i, zeros = 0, ones = 0, neg;
193 if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */
194 /* Count homogeneous 16 bit fragments. */
195 for (i = 0; i < 4; i++) {
196 uint64_t frag = (u64 >> i*16) & 0xffff;
197 zeros += (frag == 0);
198 ones += (frag == 0xffff);
199 }
200 neg = ones > zeros; /* Use MOVN if it pays off. */
201 if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
202 int shift = 0, lshift = 0;
203 uint64_t n64 = neg ? ~u64 : u64;
204 if (n64 != 0) {
205 /* Find first/last fragment to be filled. */
206 shift = (63-emit_clz64(n64)) & ~15;
207 lshift = emit_ctz64(n64) & ~15;
208 }
209 /* MOVK requires the original value (u64). */
210 while (shift > lshift) {
211 uint32_t u16 = (u64 >> shift) & 0xffff;
212 /* Skip fragments that are correctly filled by MOVN/MOVZ. */
213 if (u16 != (neg ? 0xffff : 0))
214 emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
215 shift -= 16;
216 }
217 /* But MOVN needs an inverted value (n64). */
218 emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
219 A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
220 }
221 }
222}
223
224/* Load a 32 bit constant into a GPR. */
225#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0)
226
227/* Load a 64 bit constant into a GPR. */
228#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X)
229
230#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr))
231
232#define glofs(as, k) \
233 ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
234#define mcpofs(as, k) \
235 ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
236#define checkmcpofs(as, k) \
237 ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0)
238
239static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
240
241/* Get/set from constant pointer. */
242static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p)
243{
244 /* First, check if ip + offset is in range. */
245 if ((ai & 0x00400000) && checkmcpofs(as, p)) {
246 emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r);
247 } else {
248 Reg base = RID_GL; /* Next, try GL + offset. */
249 int64_t ofs = glofs(as, p);
250 if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */
251 int64_t i64 = i64ptr(p);
252 base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r));
253 ofs = i64 & 0x7fffull;
254 }
255 emit_lso(as, ai, r, base, ofs);
256 }
257}
258
259/* Load 64 bit IR constant into register. */
260static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
261{
262 const uint64_t *k = &ir_k64(ir)->u64;
263 int64_t ofs;
264 if (r >= RID_MAX_GPR) {
265 uint32_t fpk = emit_isfpk64(*k);
266 if (fpk != ~0u) {
267 emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31));
268 return;
269 }
270 }
271 ofs = glofs(as, k);
272 if (emit_checkofs(A64I_LDRx, ofs)) {
273 emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx,
274 (r & 31), RID_GL, ofs);
275 } else {
276 if (r >= RID_MAX_GPR) {
277 emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP);
278 r = RID_TMP;
279 }
280 if (checkmcpofs(as, k))
281 emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r);
282 else
283 emit_loadu64(as, r, *k);
284 }
285}
286
287/* Get/set global_State fields. */
288#define emit_getgl(as, r, field) \
289 emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field)
290#define emit_setgl(as, r, field) \
291 emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field)
292
293/* Trace number is determined from pc of exit instruction. */
294#define emit_setvmstate(as, i) UNUSED(i)
295
296/* -- Emit control-flow instructions -------------------------------------- */
297
298/* Label for internal jumps. */
299typedef MCode *MCLabel;
300
301/* Return label pointing to current PC. */
302#define emit_label(as) ((as)->mcp)
303
304static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target)
305{
306 MCode *p = as->mcp;
307 ptrdiff_t delta = target - (p - 1);
308 lua_assert(((delta + 0x40000) >> 19) == 0);
309 *--p = A64I_BCC | A64F_S19((uint32_t)delta & 0x7ffff) | cond;
310 as->mcp = p;
311}
312
313static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
314{
315 MCode *p = as->mcp;
316 ptrdiff_t delta = target - (p - 1);
317 lua_assert(((delta + 0x02000000) >> 26) == 0);
318 *--p = ai | ((uint32_t)delta & 0x03ffffffu);
319 as->mcp = p;
320}
321
322#define emit_jmp(as, target) emit_branch(as, A64I_B, (target))
323
324static void emit_call(ASMState *as, void *target)
325{
326 MCode *p = --as->mcp;
327 ptrdiff_t delta = (char *)target - (char *)p;
328 if ((((delta>>2) + 0x02000000) >> 26) == 0) {
329 *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu);
330 } else { /* Target out of range: need indirect call. But don't use R0-R7. */
331 Reg r = ra_allock(as, i64ptr(target),
332 RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
333 *p = A64I_BLR | A64F_N(r);
334 }
335}
336
337/* -- Emit generic operations --------------------------------------------- */
338
339/* Generic move between two regs. */
340static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
341{
342 if (dst >= RID_MAX_GPR) {
343 emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S,
344 (dst & 31), (src & 31));
345 return;
346 }
347 if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */
348 MCode ins = *as->mcp, swp = (src^dst);
349 if ((ins & 0xbf800000) == 0xb9000000) {
350 if (!((ins ^ (dst << 5)) & 0x000003e0))
351 *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */
352 if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f))
353 *as->mcp = ins ^ swp; /* Swap D in store. */
354 }
355 }
356 emit_dm(as, A64I_MOVx, dst, src);
357}
358
359/* Generic load of register with base and (small) offset address. */
360static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
361{
362 if (r >= RID_MAX_GPR)
363 emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs);
364 else
365 emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs);
366}
367
368/* Generic store of register with base and (small) offset address. */
369static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
370{
371 if (r >= RID_MAX_GPR)
372 emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs);
373 else
374 emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs);
375}
376
377/* Emit an arithmetic operation with a constant operand. */
378static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src,
379 int32_t i, RegSet allow)
380{
381 uint32_t k = emit_isk12(i);
382 if (k)
383 emit_dn(as, ai^k, dest, src);
384 else
385 emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
386}
387
388/* Add offset to pointer. */
389static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
390{
391 if (ofs)
392 emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r,
393 ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r));
394}
395
396#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))
397