aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2023-09-09 16:56:16 +0200
committerMike Pall <mike>2023-09-09 16:56:16 +0200
commit315dc3e776d3199269a464b17d07c48064d3fd09 (patch)
tree34334d4c4a4830f2d3b08ed26b3c4e0130ad778e /src
parent5149b0a3a2809fef155ff2b2f01c667d920db3c2 (diff)
downloadluajit-315dc3e776d3199269a464b17d07c48064d3fd09.tar.gz
luajit-315dc3e776d3199269a464b17d07c48064d3fd09.tar.bz2
luajit-315dc3e776d3199269a464b17d07c48064d3fd09.zip
ARM64: Reload BASE via GL instead of spilling it.
Thanks to Peter Cawley. #1068.
Diffstat (limited to 'src')
-rw-r--r--src/lj_asm_arm64.h91
-rw-r--r--src/lj_emit_arm64.h2
2 files changed, 38 insertions, 55 deletions
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index a575269b..b8fbf69b 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -541,8 +541,6 @@ static void asm_retf(ASMState *as, IRIns *ir)
541 as->topslot -= (BCReg)delta; 541 as->topslot -= (BCReg)delta;
542 if ((int32_t)as->topslot < 0) as->topslot = 0; 542 if ((int32_t)as->topslot < 0) as->topslot = 0;
543 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ 543 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
544 /* Need to force a spill on REF_BASE now to update the stack slot. */
545 emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
546 emit_setgl(as, base, jit_base); 544 emit_setgl(as, base, jit_base);
547 emit_addptr(as, base, -8*delta); 545 emit_addptr(as, base, -8*delta);
548 asm_guardcc(as, CC_NE); 546 asm_guardcc(as, CC_NE);
@@ -1794,37 +1792,28 @@ static void asm_prof(ASMState *as, IRIns *ir)
1794static void asm_stack_check(ASMState *as, BCReg topslot, 1792static void asm_stack_check(ASMState *as, BCReg topslot,
1795 IRIns *irp, RegSet allow, ExitNo exitno) 1793 IRIns *irp, RegSet allow, ExitNo exitno)
1796{ 1794{
1797 Reg pbase;
1798 uint32_t k; 1795 uint32_t k;
1796 Reg pbase = RID_BASE;
1799 if (irp) { 1797 if (irp) {
1800 if (!ra_hasspill(irp->s)) { 1798 pbase = irp->r;
1801 pbase = irp->r; 1799 if (!ra_hasreg(pbase))
1802 lj_assertA(ra_hasreg(pbase), "base reg lost"); 1800 pbase = allow ? (0x40 | rset_pickbot(allow)) : (0xC0 | RID_RET);
1803 } else if (allow) {
1804 pbase = rset_pickbot(allow);
1805 } else {
1806 pbase = RID_RET;
1807 emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */
1808 }
1809 } else {
1810 pbase = RID_BASE;
1811 } 1801 }
1812 emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); 1802 emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
1803 if (pbase & 0x80) /* Restore temp. register. */
1804 emit_lso(as, A64I_LDRx, (pbase & 31), RID_SP, 0);
1813 k = emit_isk12((8*topslot)); 1805 k = emit_isk12((8*topslot));
1814 lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); 1806 lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
1815 emit_n(as, A64I_CMPx^k, RID_TMP); 1807 emit_n(as, A64I_CMPx^k, RID_TMP);
1816 emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); 1808 emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, (pbase & 31));
1817 emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, 1809 emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
1818 (int32_t)offsetof(lua_State, maxstack)); 1810 (int32_t)offsetof(lua_State, maxstack));
1819 if (irp) { /* Must not spill arbitrary registers in head of side trace. */ 1811 if (pbase & 0x40) {
1820 if (ra_hasspill(irp->s)) 1812 emit_getgl(as, (pbase & 31), jit_base);
1821 emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); 1813 if (pbase & 0x80) /* Save temp register. */
1822 emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); 1814 emit_lso(as, A64I_STRx, (pbase & 31), RID_SP, 0);
1823 if (ra_hasspill(irp->s) && !allow)
1824 emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */
1825 } else {
1826 emit_getgl(as, RID_TMP, cur_L);
1827 } 1815 }
1816 emit_getgl(as, RID_TMP, cur_L);
1828} 1817}
1829 1818
1830/* Restore Lua stack from on-trace state. */ 1819/* Restore Lua stack from on-trace state. */
@@ -1921,46 +1910,40 @@ static void asm_loop_tail_fixup(ASMState *as)
1921 1910
1922/* -- Head of trace ------------------------------------------------------- */ 1911/* -- Head of trace ------------------------------------------------------- */
1923 1912
1924/* Reload L register from g->cur_L. */
1925static void asm_head_lreg(ASMState *as)
1926{
1927 IRIns *ir = IR(ASMREF_L);
1928 if (ra_used(ir)) {
1929 Reg r = ra_dest(as, ir, RSET_GPR);
1930 emit_getgl(as, r, cur_L);
1931 ra_evictk(as);
1932 }
1933}
1934
1935/* Coalesce BASE register for a root trace. */ 1913/* Coalesce BASE register for a root trace. */
1936static void asm_head_root_base(ASMState *as) 1914static void asm_head_root_base(ASMState *as)
1937{ 1915{
1938 IRIns *ir; 1916 IRIns *ir = IR(REF_BASE);
1939 asm_head_lreg(as); 1917 Reg r = ir->r;
1940 ir = IR(REF_BASE); 1918 if (ra_hasreg(r)) {
1941 if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) 1919 ra_free(as, r);
1942 ra_spill(as, ir); 1920 if (rset_test(as->modset, r) || irt_ismarked(ir->t))
1943 ra_destreg(as, ir, RID_BASE); 1921 ir->r = RID_INIT; /* No inheritance for modified BASE register. */
1922 if (r != RID_BASE)
1923 emit_movrr(as, ir, r, RID_BASE);
1924 }
1944} 1925}
1945 1926
1946/* Coalesce BASE register for a side trace. */ 1927/* Coalesce BASE register for a side trace. */
1947static Reg asm_head_side_base(ASMState *as, IRIns *irp) 1928static Reg asm_head_side_base(ASMState *as, IRIns *irp)
1948{ 1929{
1949 IRIns *ir; 1930 IRIns *ir = IR(REF_BASE);
1950 asm_head_lreg(as); 1931 Reg r = ir->r;
1951 ir = IR(REF_BASE); 1932 if (ra_hasreg(r)) {
1952 if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) 1933 ra_free(as, r);
1953 ra_spill(as, ir); 1934 if (rset_test(as->modset, r) || irt_ismarked(ir->t))
1954 if (ra_hasspill(irp->s)) { 1935 ir->r = RID_INIT; /* No inheritance for modified BASE register. */
1955 return ra_dest(as, ir, RSET_GPR); 1936 if (irp->r == r) {
1956 } else { 1937 return r; /* Same BASE register already coalesced. */
1957 Reg r = irp->r; 1938 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
1958 lj_assertA(ra_hasreg(r), "base reg lost"); 1939 /* Move from coalesced parent reg. */
1959 if (r != ir->r && !rset_test(as->freeset, r)) 1940 emit_movrr(as, ir, r, irp->r);
1960 ra_restore(as, regcost_ref(as->cost[r])); 1941 return irp->r;
1961 ra_destreg(as, ir, r); 1942 } else {
1962 return r; 1943 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
1944 }
1963 } 1945 }
1946 return RID_NONE;
1964} 1947}
1965 1948
1966/* -- Tail of trace ------------------------------------------------------- */ 1949/* -- Tail of trace ------------------------------------------------------- */
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
index 50e658dd..d4c54255 100644
--- a/src/lj_emit_arm64.h
+++ b/src/lj_emit_arm64.h
@@ -150,7 +150,7 @@ nopair:
150/* -- Emit loads/stores --------------------------------------------------- */ 150/* -- Emit loads/stores --------------------------------------------------- */
151 151
152/* Prefer rematerialization of BASE/L from global_State over spills. */ 152/* Prefer rematerialization of BASE/L from global_State over spills. */
153#define emit_canremat(ref) ((ref) <= ASMREF_L) 153#define emit_canremat(ref) ((ref) <= REF_BASE)
154 154
155/* Try to find a one-step delta relative to other consts. */ 155/* Try to find a one-step delta relative to other consts. */
156static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64) 156static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int is64)