aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2016-11-24 18:56:19 +0100
committerMike Pall <mike>2016-11-24 18:56:19 +0100
commit81259898ea177bb7b4becebf3d7686603f6b373b (patch)
tree99510dd250c28d91fcc4c236380e646b3cd108d3
parent1131fa22a23c6284ba5945bffb3dcd6deef2076e (diff)
downloadluajit-81259898ea177bb7b4becebf3d7686603f6b373b.tar.gz
luajit-81259898ea177bb7b4becebf3d7686603f6b373b.tar.bz2
luajit-81259898ea177bb7b4becebf3d7686603f6b373b.zip
ARM64: Emit more efficient trace exits.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
-rw-r--r--src/lj_asm_arm64.h71
-rw-r--r--src/lj_target_arm64.h14
-rw-r--r--src/vm_arm64.dasc31
3 files changed, 56 insertions, 60 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index 0a2f5306..19b3331d 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -47,53 +47,41 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
47 47
48/* -- Guard handling ------------------------------------------------------ */ 48/* -- Guard handling ------------------------------------------------------ */
49 49
50/* Generate an exit stub group at the bottom of the reserved MCode memory. */ 50/* Setup all needed exit stubs. */
51static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) 51static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
52{ 52{
53 MCode *mxp = as->mcbot; 53 ExitNo i;
54 int i; 54 MCode *mxp = as->mctop;
55 if (mxp + 3*4+4*EXITSTUBS_PER_GROUP >= as->mctop) 55 if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
56 asm_mclimit(as); 56 asm_mclimit(as);
57 /* str lr, [sp]; bl ->vm_exit_handler; .long group. */ 57 /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
58 *mxp++ = A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP); 58 for (i = nexits-1; (int32_t)i >= 0; i--)
59 *mxp = A64I_BL | (((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu); 59 *--mxp = A64I_BL|((-3-i)&0x03ffffffu);
60 mxp++; 60 *--mxp = A64I_MOVZw|A64F_U16(as->T->traceno);
61 *mxp++ = group*EXITSTUBS_PER_GROUP; 61 mxp--;
62 for (i = 0; i < EXITSTUBS_PER_GROUP; i++) 62 *mxp = A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu);
63 *mxp++ = A64I_B | ((-3-i)&0x03ffffffu); 63 *--mxp = A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP);
64 lj_mcode_sync(as->mcbot, mxp); 64 as->mctop = mxp;
65 lj_mcode_commitbot(as->J, mxp);
66 as->mcbot = mxp;
67 as->mclim = as->mcbot + MCLIM_REDZONE;
68 return mxp - EXITSTUBS_PER_GROUP;
69} 65}
70 66
71/* Setup all needed exit stubs. */ 67static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
72static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
73{ 68{
74 ExitNo i; 69 /* Keep this in-sync with exitstub_trace_addr(). */
75 if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) 70 return as->mctop + exitno + 3;
76 lj_trace_err(as->J, LJ_TRERR_SNAPOV);
77 for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++)
78 if (as->J->exitstubgroup[i] == NULL)
79 as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
80} 71}
81 72
82/* Emit conditional branch to exit for guard. */ 73/* Emit conditional branch to exit for guard. */
83static void asm_guardcc(ASMState *as, A64CC cc) 74static void asm_guardcc(ASMState *as, A64CC cc)
84{ 75{
85 MCode *target = exitstub_addr(as->J, as->snapno); 76 MCode *target = asm_exitstub_addr(as, as->snapno);
86 MCode *p = as->mcp; 77 MCode *p = as->mcp;
87 if (LJ_UNLIKELY(p == as->invmcp)) { 78 if (LJ_UNLIKELY(p == as->invmcp)) {
88 as->loopinv = 1; 79 as->loopinv = 1;
89 *p = A64I_BL | ((target-p) & 0x03ffffffu); 80 *p = A64I_B | ((target-p) & 0x03ffffffu);
90 emit_cond_branch(as, cc^1, p-1); 81 emit_cond_branch(as, cc^1, p-1);
91 return; 82 return;
92 } 83 }
93 /* No conditional calls. Emit b.cc/bl instead. */ 84 emit_cond_branch(as, cc, target);
94 /* That's a bad idea. NYI: emit per-trace exit stubs instead, see PPC. */
95 emit_branch(as, A64I_BL, target);
96 emit_cond_branch(as, cc^1, p);
97} 85}
98 86
99/* -- Operand fusion ------------------------------------------------------ */ 87/* -- Operand fusion ------------------------------------------------------ */
@@ -1568,8 +1556,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
1568 } else { 1556 } else {
1569 pbase = RID_BASE; 1557 pbase = RID_BASE;
1570 } 1558 }
1571 emit_branch(as, A64I_BL, exitstub_addr(as->J, exitno)); 1559 emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
1572 emit_cond_branch(as, CC_LS^1, as->mcp+1);
1573 k = emit_isk12((8*topslot)); 1560 k = emit_isk12((8*topslot));
1574 lua_assert(k); 1561 lua_assert(k);
1575 emit_n(as, A64I_CMPx^k, RID_TMP); 1562 emit_n(as, A64I_CMPx^k, RID_TMP);
@@ -1744,7 +1731,8 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1744 /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ 1731 /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
1745 int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); 1732 int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
1746 if (spadj == 0) { 1733 if (spadj == 0) {
1747 as->mctop = --p; 1734 *--p = A64I_NOP;
1735 as->mctop = p;
1748 } else { 1736 } else {
1749 /* Patch stack adjustment. */ 1737 /* Patch stack adjustment. */
1750 uint32_t k = emit_isk12(spadj); 1738 uint32_t k = emit_isk12(spadj);
@@ -1805,13 +1793,18 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
1805 MCode *pe = (MCode *)((char *)p + T->szmcode); 1793 MCode *pe = (MCode *)((char *)p + T->szmcode);
1806 MCode *cstart = NULL, *cend = p; 1794 MCode *cstart = NULL, *cend = p;
1807 MCode *mcarea = lj_mcode_patch(J, p, 0); 1795 MCode *mcarea = lj_mcode_patch(J, p, 0);
1808 MCode *px = exitstub_addr(J, exitno); 1796 MCode *px = exitstub_trace_addr(T, exitno);
1809 for (; p < pe; p++) { 1797 for (; p < pe; p++) {
1810 /* Look for bl exitstub, replace with b target. */ 1798 /* Look for bcc/b exitstub, replace with bcc/b target. */
1811 uint32_t ins = *p; 1799 uint32_t ins = *p;
1812 if ((ins & 0xfc000000u) == 0x94000000u && 1800 if ((ins & 0xff000000u) == 0x54000000u &&
1813 ((ins ^ (px-p)) & 0x03ffffffu) == 0) { 1801 ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
1814 *p = (ins & 0x7c000000u) | ((target-p) & 0x03ffffffu); 1802 *p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u);
1803 cend = p+1;
1804 if (!cstart) cstart = p;
1805 } else if ((ins & 0xfc000000u) == 0x14000000u &&
1806 ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
1807 *p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu);
1815 cend = p+1; 1808 cend = p+1;
1816 if (!cstart) cstart = p; 1809 if (!cstart) cstart = p;
1817 } 1810 }
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
index 0cef06d5..1cd02fe8 100644
--- a/src/lj_target_arm64.h
+++ b/src/lj_target_arm64.h
@@ -101,14 +101,18 @@ typedef struct {
101 int32_t spill[256]; /* Spill slots. */ 101 int32_t spill[256]; /* Spill slots. */
102} ExitState; 102} ExitState;
103 103
104/* PC after instruction that caused an exit. Used to find the trace number. */
105#define EXITSTATE_PCREG RID_LR
106/* Highest exit + 1 indicates stack check. */ 104/* Highest exit + 1 indicates stack check. */
107#define EXITSTATE_CHECKEXIT 1 105#define EXITSTATE_CHECKEXIT 1
108 106
109#define EXITSTUB_SPACING 4 107/* Return the address of a per-trace exit stub. */
110#define EXITSTUBS_PER_GROUP 32 108static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
111 109{
110 while (*p == 0xd503201f) p++; /* Skip A64I_NOP. */
111 return p + 3 + exitno;
112}
113/* Avoid dependence on lj_jit.h if only including lj_target.h. */
114#define exitstub_trace_addr(T, exitno) \
115 exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno))
112 116
113/* -- Instructions -------------------------------------------------------- */ 117/* -- Instructions -------------------------------------------------------- */
114 118
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index a6227bf7..86c78fa5 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -1927,22 +1927,21 @@ static void build_subroutines(BuildCtx *ctx)
1927 | stp d30, d31, [sp, #30*8] 1927 | stp d30, d31, [sp, #30*8]
1928 | ldr CARG1, [sp, #64*8] // Load original value of lr. 1928 | ldr CARG1, [sp, #64*8] // Load original value of lr.
1929 | add CARG3, sp, #64*8 // Recompute original value of sp. 1929 | add CARG3, sp, #64*8 // Recompute original value of sp.
1930 | mv_vmstate CARG4, EXIT 1930 | mv_vmstate CARG4, EXIT
1931 | ldr CARG2w, [CARG1, #-4]! // Get exit instruction. 1931 | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP.
1932 | stp CARG1, CARG3, [sp, #62*8] // Store exit pc/sp in RID_LR/RID_SP. 1932 | sub CARG1, CARG1, lr
1933 | lsl CARG2, CARG2, #38 1933 | ldr L, GL->cur_L
1934 | add CARG1, CARG1, CARG2, asr #36 1934 | lsr CARG1, CARG1, #2
1935 | ldr CARG2w, [lr] // Load exit stub group offset. 1935 | ldr BASE, GL->jit_base
1936 | sub CARG1, CARG1, lr 1936 | sub CARG1, CARG1, #2
1937 | sub CARG1, CARG1, #4 1937 | ldr CARG2w, [lr] // Load trace number.
1938 | ldr L, GL->cur_L 1938 | st_vmstate CARG4
1939 | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. 1939 | str BASE, L->base
1940 | ldr BASE, GL->jit_base 1940 | ubfx CARG2w, CARG2w, #5, #16
1941 | st_vmstate CARG4 1941 | str CARG1w, [GL, #GL_J(exitno)]
1942 | str CARG1w, [GL, #GL_J(exitno)] 1942 | str CARG2w, [GL, #GL_J(parent)]
1943 | str BASE, L->base 1943 | str L, [GL, #GL_J(L)]
1944 | str L, [GL, #GL_J(L)] 1944 | str xzr, GL->jit_base
1945 | str xzr, GL->jit_base
1946 | add CARG1, GL, #GG_G2J 1945 | add CARG1, GL, #GG_G2J
1947 | mov CARG2, sp 1946 | mov CARG2, sp
1948 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) 1947 | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)