diff options
author | Mike Pall <mike> | 2023-09-09 16:56:16 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2023-09-09 16:56:16 +0200 |
commit | 315dc3e776d3199269a464b17d07c48064d3fd09 (patch) | |
tree | 34334d4c4a4830f2d3b08ed26b3c4e0130ad778e /src | |
parent | 5149b0a3a2809fef155ff2b2f01c667d920db3c2 (diff) | |
download | luajit-315dc3e776d3199269a464b17d07c48064d3fd09.tar.gz luajit-315dc3e776d3199269a464b17d07c48064d3fd09.tar.bz2 luajit-315dc3e776d3199269a464b17d07c48064d3fd09.zip |
ARM64: Reload BASE via GL instead of spilling it.
Thanks to Peter Cawley. #1068.
Diffstat (limited to 'src')
-rw-r--r-- | src/lj_asm_arm64.h | 91 | ||||
-rw-r--r-- | src/lj_emit_arm64.h | 2 |
2 files changed, 38 insertions, 55 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index a575269b..b8fbf69b 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h | |||
@@ -541,8 +541,6 @@ static void asm_retf(ASMState *as, IRIns *ir) | |||
541 | as->topslot -= (BCReg)delta; | 541 | as->topslot -= (BCReg)delta; |
542 | if ((int32_t)as->topslot < 0) as->topslot = 0; | 542 | if ((int32_t)as->topslot < 0) as->topslot = 0; |
543 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | 543 | irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ |
544 | /* Need to force a spill on REF_BASE now to update the stack slot. */ | ||
545 | emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); | ||
546 | emit_setgl(as, base, jit_base); | 544 | emit_setgl(as, base, jit_base); |
547 | emit_addptr(as, base, -8*delta); | 545 | emit_addptr(as, base, -8*delta); |
548 | asm_guardcc(as, CC_NE); | 546 | asm_guardcc(as, CC_NE); |
@@ -1794,37 +1792,28 @@ static void asm_prof(ASMState *as, IRIns *ir) | |||
1794 | static void asm_stack_check(ASMState *as, BCReg topslot, | 1792 | static void asm_stack_check(ASMState *as, BCReg topslot, |
1795 | IRIns *irp, RegSet allow, ExitNo exitno) | 1793 | IRIns *irp, RegSet allow, ExitNo exitno) |
1796 | { | 1794 | { |
1797 | Reg pbase; | ||
1798 | uint32_t k; | 1795 | uint32_t k; |
1796 | Reg pbase = RID_BASE; | ||
1799 | if (irp) { | 1797 | if (irp) { |
1800 | if (!ra_hasspill(irp->s)) { | 1798 | pbase = irp->r; |
1801 | pbase = irp->r; | 1799 | if (!ra_hasreg(pbase)) |
1802 | lj_assertA(ra_hasreg(pbase), "base reg lost"); | 1800 | pbase = allow ? (0x40 | rset_pickbot(allow)) : (0xC0 | RID_RET); |
1803 | } else if (allow) { | ||
1804 | pbase = rset_pickbot(allow); | ||
1805 | } else { | ||
1806 | pbase = RID_RET; | ||
1807 | emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ | ||
1808 | } | ||
1809 | } else { | ||
1810 | pbase = RID_BASE; | ||
1811 | } | 1801 | } |
1812 | emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); | 1802 | emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); |
1803 | if (pbase & 0x80) /* Restore temp. register. */ | ||
1804 | emit_lso(as, A64I_LDRx, (pbase & 31), RID_SP, 0); | ||
1813 | k = emit_isk12((8*topslot)); | 1805 | k = emit_isk12((8*topslot)); |
1814 | lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); | 1806 | lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); |
1815 | emit_n(as, A64I_CMPx^k, RID_TMP); | 1807 | emit_n(as, A64I_CMPx^k, RID_TMP); |
1816 | emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); | 1808 | emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, (pbase & 31)); |
1817 | emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, | 1809 | emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, |
1818 | (int32_t)offsetof(lua_State, maxstack)); | 1810 | (int32_t)offsetof(lua_State, maxstack)); |
1819 | if (irp) { /* Must not spill arbitrary registers in head of side trace. */ | 1811 | if (pbase & 0x40) { |
1820 | if (ra_hasspill(irp->s)) | 1812 | emit_getgl(as, (pbase & 31), jit_base); |
1821 | emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); | 1813 | if (pbase & 0x80) /* Save temp register. */ |
1822 | emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); | 1814 | emit_lso(as, A64I_STRx, (pbase & 31), RID_SP, 0); |
1823 | if (ra_hasspill(irp->s) && !allow) | ||
1824 | emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ | ||
1825 | } else { | ||
1826 | emit_getgl(as, RID_TMP, cur_L); | ||
1827 | } | 1815 | } |
1816 | emit_getgl(as, RID_TMP, cur_L); | ||
1828 | } | 1817 | } |
1829 | 1818 | ||
1830 | /* Restore Lua stack from on-trace state. */ | 1819 | /* Restore Lua stack from on-trace state. */ |
@@ -1921,46 +1910,40 @@ static void asm_loop_tail_fixup(ASMState *as) | |||
1921 | 1910 | ||
1922 | /* -- Head of trace ------------------------------------------------------- */ | 1911 | /* -- Head of trace ------------------------------------------------------- */ |
1923 | 1912 | ||
1924 | /* Reload L register from g->cur_L. */ | ||
1925 | static void asm_head_lreg(ASMState *as) | ||
1926 | { | ||
1927 | IRIns *ir = IR(ASMREF_L); | ||
1928 | if (ra_used(ir)) { | ||
1929 | Reg r = ra_dest(as, ir, RSET_GPR); | ||
1930 | emit_getgl(as, r, cur_L); | ||
1931 | ra_evictk(as); | ||
1932 | } | ||
1933 | } | ||
1934 | |||
1935 | /* Coalesce BASE register for a root trace. */ | 1913 | /* Coalesce BASE register for a root trace. */ |
1936 | static void asm_head_root_base(ASMState *as) | 1914 | static void asm_head_root_base(ASMState *as) |
1937 | { | 1915 | { |
1938 | IRIns *ir; | 1916 | IRIns *ir = IR(REF_BASE); |
1939 | asm_head_lreg(as); | 1917 | Reg r = ir->r; |
1940 | ir = IR(REF_BASE); | 1918 | if (ra_hasreg(r)) { |
1941 | if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) | 1919 | ra_free(as, r); |
1942 | ra_spill(as, ir); | 1920 | if (rset_test(as->modset, r) || irt_ismarked(ir->t)) |
1943 | ra_destreg(as, ir, RID_BASE); | 1921 | ir->r = RID_INIT; /* No inheritance for modified BASE register. */ |
1922 | if (r != RID_BASE) | ||
1923 | emit_movrr(as, ir, r, RID_BASE); | ||
1924 | } | ||
1944 | } | 1925 | } |
1945 | 1926 | ||
1946 | /* Coalesce BASE register for a side trace. */ | 1927 | /* Coalesce BASE register for a side trace. */ |
1947 | static Reg asm_head_side_base(ASMState *as, IRIns *irp) | 1928 | static Reg asm_head_side_base(ASMState *as, IRIns *irp) |
1948 | { | 1929 | { |
1949 | IRIns *ir; | 1930 | IRIns *ir = IR(REF_BASE); |
1950 | asm_head_lreg(as); | 1931 | Reg r = ir->r; |
1951 | ir = IR(REF_BASE); | 1932 | if (ra_hasreg(r)) { |
1952 | if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) | 1933 | ra_free(as, r); |
1953 | ra_spill(as, ir); | 1934 | if (rset_test(as->modset, r) || irt_ismarked(ir->t)) |
1954 | if (ra_hasspill(irp->s)) { | 1935 | ir->r = RID_INIT; /* No inheritance for modified BASE register. */ |
1955 | return ra_dest(as, ir, RSET_GPR); | 1936 | if (irp->r == r) { |
1956 | } else { | 1937 | return r; /* Same BASE register already coalesced. */ |
1957 | Reg r = irp->r; | 1938 | } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { |
1958 | lj_assertA(ra_hasreg(r), "base reg lost"); | 1939 | /* Move from coalesced parent reg. */ |
1959 | if (r != ir->r && !rset_test(as->freeset, r)) | 1940 | emit_movrr(as, ir, r, irp->r); |
1960 | ra_restore(as, regcost_ref(as->cost[r])); | 1941 | return irp->r; |
1961 | ra_destreg(as, ir, r); | 1942 | } else { |
1962 | return r; | 1943 | emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ |
1944 | } | ||
1963 | } | 1945 | } |
1946 | return RID_NONE; | ||
1964 | } | 1947 | } |
1965 | 1948 | ||
1966 | /* -- Tail of trace ------------------------------------------------------- */ | 1949 | /* -- Tail of trace ------------------------------------------------------- */ |
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index 50e658dd..d4c54255 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h | |||
@@ -150,7 +150,7 @@ nopair: | |||
150 | /* -- Emit loads/stores --------------------------------------------------- */ | 150 | /* -- Emit loads/stores --------------------------------------------------- */ |
151 | 151 | ||
152 | /* Prefer rematerialization of BASE/L from global_State over spills. */ | 152 | /* Prefer rematerialization of BASE/L from global_State over spills. */ |
153 | #define emit_canremat(ref) ((ref) <= ASMREF_L) | 153 | #define emit_canremat(ref) ((ref) <= REF_BASE) |
154 | 154 | ||
155 | /* Try to find a one-step delta relative to other consts. */ | 155 | /* Try to find a one-step delta relative to other consts. */ |
156 | static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64) | 156 | static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64) |