diff options
author | Mike Pall <mike> | 2016-11-24 18:56:19 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2016-11-24 18:56:19 +0100 |
commit | 81259898ea177bb7b4becebf3d7686603f6b373b (patch) | |
tree | 99510dd250c28d91fcc4c236380e646b3cd108d3 | |
parent | 1131fa22a23c6284ba5945bffb3dcd6deef2076e (diff) | |
download | luajit-81259898ea177bb7b4becebf3d7686603f6b373b.tar.gz luajit-81259898ea177bb7b4becebf3d7686603f6b373b.tar.bz2 luajit-81259898ea177bb7b4becebf3d7686603f6b373b.zip |
ARM64: Emit more efficient trace exits.
Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
-rw-r--r-- | src/lj_asm_arm64.h | 71 | ||||
-rw-r--r-- | src/lj_target_arm64.h | 14 | ||||
-rw-r--r-- | src/vm_arm64.dasc | 31 |
3 files changed, 56 insertions, 60 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 0a2f5306..19b3331d 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h | |||
@@ -47,53 +47,41 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) | |||
47 | 47 | ||
48 | /* -- Guard handling ------------------------------------------------------ */ | 48 | /* -- Guard handling ------------------------------------------------------ */ |
49 | 49 | ||
50 | /* Generate an exit stub group at the bottom of the reserved MCode memory. */ | 50 | /* Setup all needed exit stubs. */ |
51 | static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) | 51 | static void asm_exitstub_setup(ASMState *as, ExitNo nexits) |
52 | { | 52 | { |
53 | MCode *mxp = as->mcbot; | 53 | ExitNo i; |
54 | int i; | 54 | MCode *mxp = as->mctop; |
55 | if (mxp + 3*4+4*EXITSTUBS_PER_GROUP >= as->mctop) | 55 | if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) |
56 | asm_mclimit(as); | 56 | asm_mclimit(as); |
57 | /* str lr, [sp]; bl ->vm_exit_handler; .long group. */ | 57 | /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ |
58 | *mxp++ = A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP); | 58 | for (i = nexits-1; (int32_t)i >= 0; i--) |
59 | *mxp = A64I_BL | (((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu); | 59 | *--mxp = A64I_BL|((-3-i)&0x03ffffffu); |
60 | mxp++; | 60 | *--mxp = A64I_MOVZw|A64F_U16(as->T->traceno); |
61 | *mxp++ = group*EXITSTUBS_PER_GROUP; | 61 | mxp--; |
62 | for (i = 0; i < EXITSTUBS_PER_GROUP; i++) | 62 | *mxp = A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu); |
63 | *mxp++ = A64I_B | ((-3-i)&0x03ffffffu); | 63 | *--mxp = A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP); |
64 | lj_mcode_sync(as->mcbot, mxp); | 64 | as->mctop = mxp; |
65 | lj_mcode_commitbot(as->J, mxp); | ||
66 | as->mcbot = mxp; | ||
67 | as->mclim = as->mcbot + MCLIM_REDZONE; | ||
68 | return mxp - EXITSTUBS_PER_GROUP; | ||
69 | } | 65 | } |
70 | 66 | ||
71 | /* Setup all needed exit stubs. */ | 67 | static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) |
72 | static void asm_exitstub_setup(ASMState *as, ExitNo nexits) | ||
73 | { | 68 | { |
74 | ExitNo i; | 69 | /* Keep this in-sync with exitstub_trace_addr(). */ |
75 | if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) | 70 | return as->mctop + exitno + 3; |
76 | lj_trace_err(as->J, LJ_TRERR_SNAPOV); | ||
77 | for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) | ||
78 | if (as->J->exitstubgroup[i] == NULL) | ||
79 | as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); | ||
80 | } | 71 | } |
81 | 72 | ||
82 | /* Emit conditional branch to exit for guard. */ | 73 | /* Emit conditional branch to exit for guard. */ |
83 | static void asm_guardcc(ASMState *as, A64CC cc) | 74 | static void asm_guardcc(ASMState *as, A64CC cc) |
84 | { | 75 | { |
85 | MCode *target = exitstub_addr(as->J, as->snapno); | 76 | MCode *target = asm_exitstub_addr(as, as->snapno); |
86 | MCode *p = as->mcp; | 77 | MCode *p = as->mcp; |
87 | if (LJ_UNLIKELY(p == as->invmcp)) { | 78 | if (LJ_UNLIKELY(p == as->invmcp)) { |
88 | as->loopinv = 1; | 79 | as->loopinv = 1; |
89 | *p = A64I_BL | ((target-p) & 0x03ffffffu); | 80 | *p = A64I_B | ((target-p) & 0x03ffffffu); |
90 | emit_cond_branch(as, cc^1, p-1); | 81 | emit_cond_branch(as, cc^1, p-1); |
91 | return; | 82 | return; |
92 | } | 83 | } |
93 | /* No conditional calls. Emit b.cc/bl instead. */ | 84 | emit_cond_branch(as, cc, target); |
94 | /* That's a bad idea. NYI: emit per-trace exit stubs instead, see PPC. */ | ||
95 | emit_branch(as, A64I_BL, target); | ||
96 | emit_cond_branch(as, cc^1, p); | ||
97 | } | 85 | } |
98 | 86 | ||
99 | /* -- Operand fusion ------------------------------------------------------ */ | 87 | /* -- Operand fusion ------------------------------------------------------ */ |
@@ -1568,8 +1556,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, | |||
1568 | } else { | 1556 | } else { |
1569 | pbase = RID_BASE; | 1557 | pbase = RID_BASE; |
1570 | } | 1558 | } |
1571 | emit_branch(as, A64I_BL, exitstub_addr(as->J, exitno)); | 1559 | emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); |
1572 | emit_cond_branch(as, CC_LS^1, as->mcp+1); | ||
1573 | k = emit_isk12((8*topslot)); | 1560 | k = emit_isk12((8*topslot)); |
1574 | lua_assert(k); | 1561 | lua_assert(k); |
1575 | emit_n(as, A64I_CMPx^k, RID_TMP); | 1562 | emit_n(as, A64I_CMPx^k, RID_TMP); |
@@ -1744,7 +1731,8 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |||
1744 | /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ | 1731 | /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ |
1745 | int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); | 1732 | int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); |
1746 | if (spadj == 0) { | 1733 | if (spadj == 0) { |
1747 | as->mctop = --p; | 1734 | *--p = A64I_NOP; |
1735 | as->mctop = p; | ||
1748 | } else { | 1736 | } else { |
1749 | /* Patch stack adjustment. */ | 1737 | /* Patch stack adjustment. */ |
1750 | uint32_t k = emit_isk12(spadj); | 1738 | uint32_t k = emit_isk12(spadj); |
@@ -1805,13 +1793,18 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
1805 | MCode *pe = (MCode *)((char *)p + T->szmcode); | 1793 | MCode *pe = (MCode *)((char *)p + T->szmcode); |
1806 | MCode *cstart = NULL, *cend = p; | 1794 | MCode *cstart = NULL, *cend = p; |
1807 | MCode *mcarea = lj_mcode_patch(J, p, 0); | 1795 | MCode *mcarea = lj_mcode_patch(J, p, 0); |
1808 | MCode *px = exitstub_addr(J, exitno); | 1796 | MCode *px = exitstub_trace_addr(T, exitno); |
1809 | for (; p < pe; p++) { | 1797 | for (; p < pe; p++) { |
1810 | /* Look for bl exitstub, replace with b target. */ | 1798 | /* Look for bcc/b exitstub, replace with bcc/b target. */ |
1811 | uint32_t ins = *p; | 1799 | uint32_t ins = *p; |
1812 | if ((ins & 0xfc000000u) == 0x94000000u && | 1800 | if ((ins & 0xff000000u) == 0x54000000u && |
1813 | ((ins ^ (px-p)) & 0x03ffffffu) == 0) { | 1801 | ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { |
1814 | *p = (ins & 0x7c000000u) | ((target-p) & 0x03ffffffu); | 1802 | *p = (ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u); |
1803 | cend = p+1; | ||
1804 | if (!cstart) cstart = p; | ||
1805 | } else if ((ins & 0xfc000000u) == 0x14000000u && | ||
1806 | ((ins ^ (px-p)) & 0x03ffffffu) == 0) { | ||
1807 | *p = (ins & 0xfc000000u) | ((target-p) & 0x03ffffffu); | ||
1815 | cend = p+1; | 1808 | cend = p+1; |
1816 | if (!cstart) cstart = p; | 1809 | if (!cstart) cstart = p; |
1817 | } | 1810 | } |
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 0cef06d5..1cd02fe8 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h | |||
@@ -101,14 +101,18 @@ typedef struct { | |||
101 | int32_t spill[256]; /* Spill slots. */ | 101 | int32_t spill[256]; /* Spill slots. */ |
102 | } ExitState; | 102 | } ExitState; |
103 | 103 | ||
104 | /* PC after instruction that caused an exit. Used to find the trace number. */ | ||
105 | #define EXITSTATE_PCREG RID_LR | ||
106 | /* Highest exit + 1 indicates stack check. */ | 104 | /* Highest exit + 1 indicates stack check. */ |
107 | #define EXITSTATE_CHECKEXIT 1 | 105 | #define EXITSTATE_CHECKEXIT 1 |
108 | 106 | ||
109 | #define EXITSTUB_SPACING 4 | 107 | /* Return the address of a per-trace exit stub. */ |
110 | #define EXITSTUBS_PER_GROUP 32 | 108 | static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) |
111 | 109 | { | |
110 | while (*p == 0xd503201f) p++; /* Skip A64I_NOP. */ | ||
111 | return p + 3 + exitno; | ||
112 | } | ||
113 | /* Avoid dependence on lj_jit.h if only including lj_target.h. */ | ||
114 | #define exitstub_trace_addr(T, exitno) \ | ||
115 | exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) | ||
112 | 116 | ||
113 | /* -- Instructions -------------------------------------------------------- */ | 117 | /* -- Instructions -------------------------------------------------------- */ |
114 | 118 | ||
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index a6227bf7..86c78fa5 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc | |||
@@ -1927,22 +1927,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
1927 | | stp d30, d31, [sp, #30*8] | 1927 | | stp d30, d31, [sp, #30*8] |
1928 | | ldr CARG1, [sp, #64*8] // Load original value of lr. | 1928 | | ldr CARG1, [sp, #64*8] // Load original value of lr. |
1929 | | add CARG3, sp, #64*8 // Recompute original value of sp. | 1929 | | add CARG3, sp, #64*8 // Recompute original value of sp. |
1930 | | mv_vmstate CARG4, EXIT | 1930 | | mv_vmstate CARG4, EXIT |
1931 | | ldr CARG2w, [CARG1, #-4]! // Get exit instruction. | 1931 | | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP. |
1932 | | stp CARG1, CARG3, [sp, #62*8] // Store exit pc/sp in RID_LR/RID_SP. | 1932 | | sub CARG1, CARG1, lr |
1933 | | lsl CARG2, CARG2, #38 | 1933 | | ldr L, GL->cur_L |
1934 | | add CARG1, CARG1, CARG2, asr #36 | 1934 | | lsr CARG1, CARG1, #2 |
1935 | | ldr CARG2w, [lr] // Load exit stub group offset. | 1935 | | ldr BASE, GL->jit_base |
1936 | | sub CARG1, CARG1, lr | 1936 | | sub CARG1, CARG1, #2 |
1937 | | sub CARG1, CARG1, #4 | 1937 | | ldr CARG2w, [lr] // Load trace number. |
1938 | | ldr L, GL->cur_L | 1938 | | st_vmstate CARG4 |
1939 | | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. | 1939 | | str BASE, L->base |
1940 | | ldr BASE, GL->jit_base | 1940 | | ubfx CARG2w, CARG2w, #5, #16 |
1941 | | st_vmstate CARG4 | 1941 | | str CARG1w, [GL, #GL_J(exitno)] |
1942 | | str CARG1w, [GL, #GL_J(exitno)] | 1942 | | str CARG2w, [GL, #GL_J(parent)] |
1943 | | str BASE, L->base | 1943 | | str L, [GL, #GL_J(L)] |
1944 | | str L, [GL, #GL_J(L)] | 1944 | | str xzr, GL->jit_base |
1945 | | str xzr, GL->jit_base | ||
1946 | | add CARG1, GL, #GG_G2J | 1945 | | add CARG1, GL, #GG_G2J |
1947 | | mov CARG2, sp | 1946 | | mov CARG2, sp |
1948 | | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) | 1947 | | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) |