aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Pall <mike>2012-07-02 23:47:12 +0200
committerMike Pall <mike>2012-07-02 23:47:12 +0200
commit0af3f47ba01b9634f75a1aee38e318d74dab53d0 (patch)
tree1ab064739041d279a54f89f396ae03bc9cf89864 /src
parent79e1eaa73b1bb8dd0e2ea7aeaba8504f89e5ff94 (diff)
downloadluajit-0af3f47ba01b9634f75a1aee38e318d74dab53d0.tar.gz
luajit-0af3f47ba01b9634f75a1aee38e318d74dab53d0.tar.bz2
luajit-0af3f47ba01b9634f75a1aee38e318d74dab53d0.zip
Add allocation sinking and store sinking optimization.
Diffstat (limited to 'src')
-rw-r--r--src/Makefile2
-rw-r--r--src/Makefile.dep20
-rw-r--r--src/jit/dump.lua15
-rw-r--r--src/lj_asm.c67
-rw-r--r--src/lj_asm_arm.h49
-rw-r--r--src/lj_asm_mips.h43
-rw-r--r--src/lj_asm_ppc.h34
-rw-r--r--src/lj_asm_x86.h15
-rw-r--r--src/lj_iropt.h1
-rw-r--r--src/lj_jit.h9
-rw-r--r--src/lj_opt_sink.c244
-rw-r--r--src/lj_snap.c330
-rw-r--r--src/lj_target.h6
-rw-r--r--src/lj_trace.c1
-rw-r--r--src/ljamalg.c1
15 files changed, 749 insertions, 88 deletions
diff --git a/src/Makefile b/src/Makefile
index 6e0c7463..13344a77 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -443,7 +443,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \
443 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_api.o \ 443 lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_api.o \
444 lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o \ 444 lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o \
445 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ 445 lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
446 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o \ 446 lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
447 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ 447 lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
448 lj_asm.o lj_trace.o lj_gdbjit.o \ 448 lj_asm.o lj_trace.o lj_gdbjit.o \
449 lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ 449 lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
diff --git a/src/Makefile.dep b/src/Makefile.dep
index ff4492fb..1c7e5dc0 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -142,6 +142,8 @@ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
142lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ 142lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
143 lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ 143 lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
144 lj_dispatch.h lj_traceerr.h lj_vm.h 144 lj_dispatch.h lj_traceerr.h lj_vm.h
145lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
146 lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
145lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ 147lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
146 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ 148 lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \
147 lj_iropt.h lj_vm.h 149 lj_iropt.h lj_vm.h
@@ -153,8 +155,9 @@ lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
153 lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ 155 lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
154 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h 156 lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h
155lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ 157lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
156 lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ 158 lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
157 lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h lj_target_*.h 159 lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
160 lj_target_*.h lj_ctype.h lj_cdata.h
158lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 161lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
159 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ 162 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \
160 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ 163 lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \
@@ -188,12 +191,13 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
188 lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ 191 lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
189 lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h \ 192 lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h \
190 lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ 193 lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
191 lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_mcode.c lj_snap.c \ 194 lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
192 lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h \ 195 lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
193 lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h \ 196 lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
194 lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c \ 197 lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
195 lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c \ 198 lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
196 lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c 199 lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
200 lib_init.c
197luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h 201luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
198host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ 202host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
199 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ 203 lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 3d62c4ea..98933971 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -374,10 +374,13 @@ local function dump_snap(tr)
374end 374end
375 375
376-- Return a register name or stack slot for a rid/sp location. 376-- Return a register name or stack slot for a rid/sp location.
377local function ridsp_name(ridsp) 377local function ridsp_name(ridsp, ins)
378 if not disass then disass = require("jit.dis_"..jit.arch) end 378 if not disass then disass = require("jit.dis_"..jit.arch) end
379 local rid = band(ridsp, 0xff) 379 local rid, slot = band(ridsp, 0xff), shr(ridsp, 8)
380 if ridsp > 255 then return format("[%x]", shr(ridsp, 8)*4) end 380 if rid == 253 or rid == 254 then
381 return slot == 0 and " {sink" or format(" {%04d", ins-slot)
382 end
383 if ridsp > 255 then return format("[%x]", slot*4) end
381 if rid < 128 then return disass.regname(rid) end 384 if rid < 128 then return disass.regname(rid) end
382 return "" 385 return ""
383end 386end
@@ -458,13 +461,15 @@ local function dump_ir(tr, dumpsnap, dumpreg)
458 end 461 end
459 elseif op ~= "NOP " and op ~= "CARG " and 462 elseif op ~= "NOP " and op ~= "CARG " and
460 (dumpreg or op ~= "RENAME") then 463 (dumpreg or op ~= "RENAME") then
464 local rid = band(ridsp, 255)
461 if dumpreg then 465 if dumpreg then
462 out:write(format("%04d %-5s ", ins, ridsp_name(ridsp))) 466 out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins)))
463 else 467 else
464 out:write(format("%04d ", ins)) 468 out:write(format("%04d ", ins))
465 end 469 end
466 out:write(format("%s%s %s %s ", 470 out:write(format("%s%s %s %s ",
467 band(ot, 128) == 0 and " " or ">", 471 (rid == 254 or rid == 253) and "}" or
472 (band(ot, 128) == 0 and " " or ">"),
468 band(ot, 64) == 0 and " " or "+", 473 band(ot, 64) == 0 and " " or "+",
469 irtype[t], op)) 474 irtype[t], op))
470 local m1, m2 = band(m, 3), band(m, 3*4) 475 local m1, m2 = band(m, 3), band(m, 3*4)
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 9bce9292..8ff3eaf7 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -782,19 +782,44 @@ static int asm_snap_canremat(ASMState *as)
782static void asm_snap_alloc1(ASMState *as, IRRef ref) 782static void asm_snap_alloc1(ASMState *as, IRRef ref)
783{ 783{
784 IRIns *ir = IR(ref); 784 IRIns *ir = IR(ref);
785 if (!ra_used(ir)) { 785 if (!(ra_used(ir) || ir->r == RID_SUNK)) {
786 RegSet allow = (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR; 786 if (ir->r == RID_SINK) {
787 /* Get a weak register if we have a free one or can rematerialize. */ 787 ir->r = RID_SUNK;
788 if ((as->freeset & allow) || 788#if LJ_HASFFI
789 (allow == RSET_FPR && asm_snap_canremat(as))) { 789 if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */
790 Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */ 790 asm_snap_alloc1(as, ir->op2);
791 if (!irt_isphi(ir->t)) 791 if (LJ_32 && (ir+1)->o == IR_HIOP)
792 ra_weak(as, r); /* But mark it as weakly referenced. */ 792 asm_snap_alloc1(as, (ir+1)->op2);
793 checkmclim(as); 793 }
794 RA_DBGX((as, "snapreg $f $r", ref, ir->r)); 794#endif
795 else { /* Allocate stored values for TNEW, TDUP and CNEW. */
796 IRIns *irs;
797 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW);
798 for (irs = IR(as->curins); irs > ir; irs--)
799 if (irs->r == RID_SINK && ir + irs->s == irs) {
800 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
801 irs->o == IR_FSTORE || irs->o == IR_XSTORE);
802 asm_snap_alloc1(as, irs->op2);
803 if (LJ_32 && (irs+1)->o == IR_HIOP)
804 asm_snap_alloc1(as, (irs+1)->op2);
805 }
806 }
807 } else if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT) {
808 asm_snap_alloc1(as, ir->op1);
795 } else { 809 } else {
796 ra_spill(as, ir); /* Otherwise force a spill slot. */ 810 RegSet allow = (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR;
797 RA_DBGX((as, "snapspill $f $s", ref, ir->s)); 811 if ((as->freeset & allow) ||
812 (allow == RSET_FPR && asm_snap_canremat(as))) {
813 /* Get a weak register if we have a free one or can rematerialize. */
814 Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */
815 if (!irt_isphi(ir->t))
816 ra_weak(as, r); /* But mark it as weakly referenced. */
817 checkmclim(as);
818 RA_DBGX((as, "snapreg $f $r", ref, ir->r));
819 } else {
820 ra_spill(as, ir); /* Otherwise force a spill slot. */
821 RA_DBGX((as, "snapspill $f $s", ref, ir->s));
822 }
798 } 823 }
799 } 824 }
800} 825}
@@ -848,7 +873,7 @@ static void asm_snap_prep(ASMState *as)
848{ 873{
849 if (as->curins < as->snapref) { 874 if (as->curins < as->snapref) {
850 do { 875 do {
851 lua_assert(as->snapno != 0); 876 if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */
852 as->snapno--; 877 as->snapno--;
853 as->snapref = as->T->snap[as->snapno].ref; 878 as->snapref = as->T->snap[as->snapno].ref;
854 } while (as->curins < as->snapref); 879 } while (as->curins < as->snapref);
@@ -1180,6 +1205,8 @@ static void asm_phi(ASMState *as, IRIns *ir)
1180 RegSet afree = (as->freeset & allow); 1205 RegSet afree = (as->freeset & allow);
1181 IRIns *irl = IR(ir->op1); 1206 IRIns *irl = IR(ir->op1);
1182 IRIns *irr = IR(ir->op2); 1207 IRIns *irr = IR(ir->op2);
1208 if (ir->r == RID_SINK) /* Sink PHI. */
1209 return;
1183 /* Spill slot shuffling is not implemented yet (but rarely needed). */ 1210 /* Spill slot shuffling is not implemented yet (but rarely needed). */
1184 if (ra_hasspill(irl->s) || ra_hasspill(irr->s)) 1211 if (ra_hasspill(irl->s) || ra_hasspill(irr->s))
1185 lj_trace_err(as->J, LJ_TRERR_NYIPHI); 1212 lj_trace_err(as->J, LJ_TRERR_NYIPHI);
@@ -1494,7 +1521,7 @@ static void asm_tail_link(ASMState *as)
1494/* -- Trace setup --------------------------------------------------------- */ 1521/* -- Trace setup --------------------------------------------------------- */
1495 1522
1496/* Clear reg/sp for all instructions and add register hints. */ 1523/* Clear reg/sp for all instructions and add register hints. */
1497static void asm_setup_regsp(ASMState *as) 1524static void asm_setup_regsp(ASMState *as, int sink)
1498{ 1525{
1499 GCtrace *T = as->T; 1526 GCtrace *T = as->T;
1500 IRRef nins = T->nins; 1527 IRRef nins = T->nins;
@@ -1545,6 +1572,14 @@ static void asm_setup_regsp(ASMState *as)
1545 inloop = 0; 1572 inloop = 0;
1546 as->evenspill = SPS_FIRST; 1573 as->evenspill = SPS_FIRST;
1547 for (lastir = IR(nins); ir < lastir; ir++) { 1574 for (lastir = IR(nins); ir < lastir; ir++) {
1575 if (sink) {
1576 if (ir->r == RID_SINK)
1577 continue;
1578 if (ir->r == RID_SUNK) { /* Revert after ASM restart. */
1579 ir->r = RID_SINK;
1580 continue;
1581 }
1582 }
1548 switch (ir->o) { 1583 switch (ir->o) {
1549 case IR_LOOP: 1584 case IR_LOOP:
1550 inloop = 1; 1585 inloop = 1;
@@ -1716,6 +1751,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1716 ASMState as_; 1751 ASMState as_;
1717 ASMState *as = &as_; 1752 ASMState *as = &as_;
1718 MCode *origtop; 1753 MCode *origtop;
1754 int sink;
1719 1755
1720 /* Ensure an initialized instruction beyond the last one for HIOP checks. */ 1756 /* Ensure an initialized instruction beyond the last one for HIOP checks. */
1721 J->cur.nins = lj_ir_nextins(J); 1757 J->cur.nins = lj_ir_nextins(J);
@@ -1736,6 +1772,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1736 as->mcp = as->mctop; 1772 as->mcp = as->mctop;
1737 as->mclim = as->mcbot + MCLIM_REDZONE; 1773 as->mclim = as->mcbot + MCLIM_REDZONE;
1738 asm_setup_target(as); 1774 asm_setup_target(as);
1775 sink = (IR(REF_BASE)->prev == 1);
1739 1776
1740 do { 1777 do {
1741 as->mcp = as->mctop; 1778 as->mcp = as->mctop;
@@ -1751,7 +1788,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
1751 as->gcsteps = 0; 1788 as->gcsteps = 0;
1752 as->sectref = as->loopref; 1789 as->sectref = as->loopref;
1753 as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; 1790 as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
1754 asm_setup_regsp(as); 1791 asm_setup_regsp(as, sink);
1755 if (!as->loopref) 1792 if (!as->loopref)
1756 asm_tail_link(as); 1793 asm_tail_link(as);
1757 1794
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index e6ab3573..19250254 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -693,6 +693,8 @@ static void asm_newref(ASMState *as, IRIns *ir)
693{ 693{
694 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; 694 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
695 IRRef args[3]; 695 IRRef args[3];
696 if (ir->r == RID_SINK) /* Sink newref. */
697 return;
696 args[0] = ASMREF_L; /* lua_State *L */ 698 args[0] = ASMREF_L; /* lua_State *L */
697 args[1] = ir->op1; /* GCtab *t */ 699 args[1] = ir->op1; /* GCtab *t */
698 args[2] = ASMREF_TMP1; /* cTValue *key */ 700 args[2] = ASMREF_TMP1; /* cTValue *key */
@@ -836,9 +838,13 @@ static void asm_xload(ASMState *as, IRIns *ir)
836 838
837static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 839static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
838{ 840{
839 Reg src = ra_alloc1(as, ir->op2, RSET_GPR); 841 if (ir->r == RID_SINK) { /* Sink store. */
840 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 842 asm_snap_prep(as);
841 rset_exclude(RSET_GPR, src), ofs); 843 } else {
844 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
845 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
846 rset_exclude(RSET_GPR, src), ofs);
847 }
842} 848}
843 849
844static void asm_ahuvload(ASMState *as, IRIns *ir) 850static void asm_ahuvload(ASMState *as, IRIns *ir)
@@ -876,21 +882,25 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
876 882
877static void asm_ahustore(ASMState *as, IRIns *ir) 883static void asm_ahustore(ASMState *as, IRIns *ir)
878{ 884{
879 RegSet allow = RSET_GPR; 885 if (ir->r == RID_SINK) { /* Sink store. */
880 Reg idx, src = RID_NONE, type = RID_NONE; 886 asm_snap_prep(as);
881 int32_t ofs = 0; 887 } else {
882 int hiop = ((ir+1)->o == IR_HIOP); 888 RegSet allow = RSET_GPR;
883 if (!irt_ispri(ir->t)) { 889 Reg idx, src = RID_NONE, type = RID_NONE;
884 src = ra_alloc1(as, ir->op2, allow); 890 int32_t ofs = 0;
885 rset_clear(allow, src); 891 int hiop = ((ir+1)->o == IR_HIOP);
892 if (!irt_ispri(ir->t)) {
893 src = ra_alloc1(as, ir->op2, allow);
894 rset_clear(allow, src);
895 }
896 if (hiop)
897 type = ra_alloc1(as, (ir+1)->op2, allow);
898 else
899 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
900 idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type));
901 if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs);
902 emit_lso(as, ARMI_STR, type, idx, ofs+4);
886 } 903 }
887 if (hiop)
888 type = ra_alloc1(as, (ir+1)->op2, allow);
889 else
890 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
891 idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type));
892 if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs);
893 emit_lso(as, ARMI_STR, type, idx, ofs+4);
894} 904}
895 905
896static void asm_sload(ASMState *as, IRIns *ir) 906static void asm_sload(ASMState *as, IRIns *ir)
@@ -1382,7 +1392,10 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1382 asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); 1392 asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO);
1383 return; 1393 return;
1384 } else if ((ir-1)->o == IR_XSTORE) { 1394 } else if ((ir-1)->o == IR_XSTORE) {
1385 asm_xstore(as, ir, 4); 1395 if ((ir-1)->r == RID_SINK)
1396 asm_snap_prep(as);
1397 else
1398 asm_xstore(as, ir, 4);
1386 return; 1399 return;
1387 } 1400 }
1388 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1401 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index b42f9f9a..def3eb2a 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -769,14 +769,18 @@ nolo:
769 769
770static void asm_newref(ASMState *as, IRIns *ir) 770static void asm_newref(ASMState *as, IRIns *ir)
771{ 771{
772 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; 772 if (ir->r == RID_SINK) { /* Sink newref. */
773 IRRef args[3]; 773 return;
774 args[0] = ASMREF_L; /* lua_State *L */ 774 } else {
775 args[1] = ir->op1; /* GCtab *t */ 775 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
776 args[2] = ASMREF_TMP1; /* cTValue *key */ 776 IRRef args[3];
777 asm_setupresult(as, ir, ci); /* TValue * */ 777 args[0] = ASMREF_L; /* lua_State *L */
778 asm_gencall(as, ci, args); 778 args[1] = ir->op1; /* GCtab *t */
779 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); 779 args[2] = ASMREF_TMP1; /* cTValue *key */
780 asm_setupresult(as, ir, ci); /* TValue * */
781 asm_gencall(as, ci, args);
782 asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
783 }
780} 784}
781 785
782static void asm_uref(ASMState *as, IRIns *ir) 786static void asm_uref(ASMState *as, IRIns *ir)
@@ -912,9 +916,14 @@ static void asm_xload(ASMState *as, IRIns *ir)
912 916
913static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 917static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
914{ 918{
915 Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); 919 if (ir->r == RID_SINK) { /* Sink store. */
916 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, 920 asm_snap_prep(as);
917 rset_exclude(RSET_GPR, src), ofs); 921 return;
922 } else {
923 Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
924 asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
925 rset_exclude(RSET_GPR, src), ofs);
926 }
918} 927}
919 928
920static void asm_ahuvload(ASMState *as, IRIns *ir) 929static void asm_ahuvload(ASMState *as, IRIns *ir)
@@ -947,6 +956,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
947 RegSet allow = RSET_GPR; 956 RegSet allow = RSET_GPR;
948 Reg idx, src = RID_NONE, type = RID_NONE; 957 Reg idx, src = RID_NONE, type = RID_NONE;
949 int32_t ofs = 0; 958 int32_t ofs = 0;
959 if (ir->r == RID_SINK) { /* Sink store. */
960 asm_snap_prep(as);
961 return;
962 }
950 if (irt_isnum(ir->t)) { 963 if (irt_isnum(ir->t)) {
951 src = ra_alloc1(as, ir->op2, RSET_FPR); 964 src = ra_alloc1(as, ir->op2, RSET_FPR);
952 } else { 965 } else {
@@ -1561,8 +1574,12 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1561 return; 1574 return;
1562 } else if ((ir-1)->o == IR_XSTORE) { 1575 } else if ((ir-1)->o == IR_XSTORE) {
1563 as->curins--; /* Handle both stores here. */ 1576 as->curins--; /* Handle both stores here. */
1564 asm_xstore(as, ir, LJ_LE ? 4 : 0); 1577 if ((ir-1)->r == RID_SINK) {
1565 asm_xstore(as, ir-1, LJ_LE ? 0 : 4); 1578 asm_snap_prep(as);
1579 } else {
1580 asm_xstore(as, ir, LJ_LE ? 4 : 0);
1581 asm_xstore(as, ir-1, LJ_LE ? 0 : 4);
1582 }
1566 return; 1583 return;
1567 } 1584 }
1568 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1585 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index 5d538fc8..142ef212 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -773,6 +773,8 @@ static void asm_newref(ASMState *as, IRIns *ir)
773{ 773{
774 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; 774 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
775 IRRef args[3]; 775 IRRef args[3];
776 if (ir->r == RID_SINK) /* Sink newref. */
777 return;
776 args[0] = ASMREF_L; /* lua_State *L */ 778 args[0] = ASMREF_L; /* lua_State *L */
777 args[1] = ir->op1; /* GCtab *t */ 779 args[1] = ir->op1; /* GCtab *t */
778 args[2] = ASMREF_TMP1; /* cTValue *key */ 780 args[2] = ASMREF_TMP1; /* cTValue *key */
@@ -892,12 +894,16 @@ static void asm_fload(ASMState *as, IRIns *ir)
892 894
893static void asm_fstore(ASMState *as, IRIns *ir) 895static void asm_fstore(ASMState *as, IRIns *ir)
894{ 896{
895 Reg src = ra_alloc1(as, ir->op2, RSET_GPR); 897 if (ir->r == RID_SINK) { /* Sink store. */
896 IRIns *irf = IR(ir->op1); 898 asm_snap_prep(as);
897 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); 899 } else {
898 int32_t ofs = field_ofs[irf->op2]; 900 Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
899 PPCIns pi = asm_fxstoreins(ir); 901 IRIns *irf = IR(ir->op1);
900 emit_tai(as, pi, src, idx, ofs); 902 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
903 int32_t ofs = field_ofs[irf->op2];
904 PPCIns pi = asm_fxstoreins(ir);
905 emit_tai(as, pi, src, idx, ofs);
906 }
901} 907}
902 908
903static void asm_xload(ASMState *as, IRIns *ir) 909static void asm_xload(ASMState *as, IRIns *ir)
@@ -912,6 +918,10 @@ static void asm_xload(ASMState *as, IRIns *ir)
912static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) 918static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
913{ 919{
914 IRIns *irb; 920 IRIns *irb;
921 if (ir->r == RID_SINK) { /* Sink store. */
922 asm_snap_prep(as);
923 return;
924 }
915 if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && 925 if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP &&
916 ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { 926 ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) {
917 /* Fuse BSWAP with XSTORE to stwbrx. */ 927 /* Fuse BSWAP with XSTORE to stwbrx. */
@@ -968,6 +978,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
968 RegSet allow = RSET_GPR; 978 RegSet allow = RSET_GPR;
969 Reg idx, src = RID_NONE, type = RID_NONE; 979 Reg idx, src = RID_NONE, type = RID_NONE;
970 int32_t ofs = AHUREF_LSX; 980 int32_t ofs = AHUREF_LSX;
981 if (ir->r == RID_SINK) { /* Sink store. */
982 asm_snap_prep(as);
983 return;
984 }
971 if (irt_isnum(ir->t)) { 985 if (irt_isnum(ir->t)) {
972 src = ra_alloc1(as, ir->op2, RSET_FPR); 986 src = ra_alloc1(as, ir->op2, RSET_FPR);
973 } else { 987 } else {
@@ -1747,8 +1761,12 @@ static void asm_hiop(ASMState *as, IRIns *ir)
1747 return; 1761 return;
1748 } else if ((ir-1)->o == IR_XSTORE) { 1762 } else if ((ir-1)->o == IR_XSTORE) {
1749 as->curins--; /* Handle both stores here. */ 1763 as->curins--; /* Handle both stores here. */
1750 asm_xstore(as, ir, 0); 1764 if ((ir-1)->r == RID_SINK) {
1751 asm_xstore(as, ir-1, 4); 1765 asm_snap_prep(as);
1766 } else {
1767 asm_xstore(as, ir, 0);
1768 asm_xstore(as, ir-1, 4);
1769 }
1752 return; 1770 return;
1753 } 1771 }
1754 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 1772 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 4537e1d5..ae14b3b6 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -1155,6 +1155,8 @@ static void asm_newref(ASMState *as, IRIns *ir)
1155 IRRef args[3]; 1155 IRRef args[3];
1156 IRIns *irkey; 1156 IRIns *irkey;
1157 Reg tmp; 1157 Reg tmp;
1158 if (ir->r == RID_SINK) /* Sink newref. */
1159 return;
1158 args[0] = ASMREF_L; /* lua_State *L */ 1160 args[0] = ASMREF_L; /* lua_State *L */
1159 args[1] = ir->op1; /* GCtab *t */ 1161 args[1] = ir->op1; /* GCtab *t */
1160 args[2] = ASMREF_TMP1; /* cTValue *key */ 1162 args[2] = ASMREF_TMP1; /* cTValue *key */
@@ -1259,6 +1261,10 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
1259 RegSet allow = RSET_GPR; 1261 RegSet allow = RSET_GPR;
1260 Reg src = RID_NONE, osrc = RID_NONE; 1262 Reg src = RID_NONE, osrc = RID_NONE;
1261 int32_t k = 0; 1263 int32_t k = 0;
1264 if (ir->r == RID_SINK) { /* Sink store. */
1265 asm_snap_prep(as);
1266 return;
1267 }
1262 /* The IRT_I16/IRT_U16 stores should never be simplified for constant 1268 /* The IRT_I16/IRT_U16 stores should never be simplified for constant
1263 ** values since mov word [mem], imm16 has a length-changing prefix. 1269 ** values since mov word [mem], imm16 has a length-changing prefix.
1264 */ 1270 */
@@ -1372,6 +1378,10 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
1372 1378
1373static void asm_ahustore(ASMState *as, IRIns *ir) 1379static void asm_ahustore(ASMState *as, IRIns *ir)
1374{ 1380{
1381 if (ir->r == RID_SINK) { /* Sink store. */
1382 asm_snap_prep(as);
1383 return;
1384 }
1375 if (irt_isnum(ir->t)) { 1385 if (irt_isnum(ir->t)) {
1376 Reg src = ra_alloc1(as, ir->op2, RSET_FPR); 1386 Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
1377 asm_fuseahuref(as, ir->op1, RSET_GPR); 1387 asm_fuseahuref(as, ir->op1, RSET_GPR);
@@ -2251,7 +2261,10 @@ static void asm_hiop(ASMState *as, IRIns *ir)
2251 asm_comp_int64(as, ir); 2261 asm_comp_int64(as, ir);
2252 return; 2262 return;
2253 } else if ((ir-1)->o == IR_XSTORE) { 2263 } else if ((ir-1)->o == IR_XSTORE) {
2254 asm_fxstore(as, ir); 2264 if ((ir-1)->r == RID_SINK)
2265 asm_snap_prep(as);
2266 else
2267 asm_fxstore(as, ir);
2255 return; 2268 return;
2256 } 2269 }
2257 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ 2270 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index 81d522e8..a17e2065 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -154,6 +154,7 @@ LJ_FUNC void lj_opt_split(jit_State *J);
154#else 154#else
155#define lj_opt_split(J) UNUSED(J) 155#define lj_opt_split(J) UNUSED(J)
156#endif 156#endif
157LJ_FUNC void lj_opt_sink(jit_State *J);
157 158
158#endif 159#endif
159 160
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 28cdd17a..517b3264 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -63,19 +63,20 @@
63#define JIT_F_OPT_NARROW 0x00200000 63#define JIT_F_OPT_NARROW 0x00200000
64#define JIT_F_OPT_LOOP 0x00400000 64#define JIT_F_OPT_LOOP 0x00400000
65#define JIT_F_OPT_ABC 0x00800000 65#define JIT_F_OPT_ABC 0x00800000
66#define JIT_F_OPT_FUSE 0x01000000 66#define JIT_F_OPT_SINK 0x01000000
67#define JIT_F_OPT_FUSE 0x02000000
67 68
68/* Optimizations names for -O. Must match the order above. */ 69/* Optimizations names for -O. Must match the order above. */
69#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD 70#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD
70#define JIT_F_OPTSTRING \ 71#define JIT_F_OPTSTRING \
71 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4fuse" 72 "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"
72 73
73/* Optimization levels set a fixed combination of flags. */ 74/* Optimization levels set a fixed combination of flags. */
74#define JIT_F_OPT_0 0 75#define JIT_F_OPT_0 0
75#define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) 76#define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE)
76#define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) 77#define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP)
77#define JIT_F_OPT_3 \ 78#define JIT_F_OPT_3 (JIT_F_OPT_2|\
78 (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_FUSE) 79 JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
79#define JIT_F_OPT_DEFAULT JIT_F_OPT_3 80#define JIT_F_OPT_DEFAULT JIT_F_OPT_3
80 81
81#if LJ_TARGET_WINDOWS || LJ_64 82#if LJ_TARGET_WINDOWS || LJ_64
diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c
new file mode 100644
index 00000000..80ab5b6e
--- /dev/null
+++ b/src/lj_opt_sink.c
@@ -0,0 +1,244 @@
1/*
2** SINK: Allocation Sinking and Store Sinking.
3** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6#define lj_opt_sink_c
7#define LUA_CORE
8
9#include "lj_obj.h"
10
11#if LJ_HASJIT
12
13#include "lj_ir.h"
14#include "lj_jit.h"
15#include "lj_iropt.h"
16#include "lj_target.h"
17
18/* Some local macros to save typing. Undef'd at the end. */
19#define IR(ref) (&J->cur.ir[(ref)])
20
21/* Check whether the store ref points to an eligible allocation. */
22static IRIns *sink_checkalloc(jit_State *J, IRIns *irs)
23{
24 IRIns *ir = IR(irs->op1);
25 if (!irref_isk(ir->op2))
26 return NULL; /* Non-constant key. */
27 if (ir->o == IR_HREFK || ir->o == IR_AREF)
28 ir = IR(ir->op1);
29 else if (!(ir->o == IR_HREF || ir->o == IR_NEWREF ||
30 ir->o == IR_FREF || ir->o == IR_ADD))
31 return NULL; /* Unhandled reference type (for XSTORE). */
32 ir = IR(ir->op1);
33 if (!(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW))
34 return NULL; /* Not an allocation. */
35 if (ir + 255 < irs)
36 return NULL; /* Out of range. */
37 return ir; /* Return allocation. */
38}
39
40/* Recursively check whether a value depends on a PHI. */
41static int sink_phidep(jit_State *J, IRRef ref)
42{
43 IRIns *ir = IR(ref);
44 if (irt_isphi(ir->t)) return 1;
45 if (ir->op1 >= REF_FIRST && sink_phidep(J, ir->op1)) return 1;
46 if (ir->op2 >= REF_FIRST && sink_phidep(J, ir->op2)) return 1;
47 return 0;
48}
49
50/* Check whether a value is a sinkable PHI or a non-PHI. */
51static int sink_checkphi(jit_State *J, IRIns *ira, IRRef ref)
52{
53 if (ref >= REF_FIRST) {
54 IRIns *ir = IR(ref);
55 if (irt_isphi(ir->t) || (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT &&
56 irt_isphi(IR(ir->op1)->t))) {
57 ira->prev++;
58 return 1; /* Sinkable PHI. */
59 }
60 return !sink_phidep(J, ref); /* Must be a non-PHI then. */
61 }
62 return 1; /* Constant (non-PHI). */
63}
64
65/* Mark non-sinkable allocations using single-pass backward propagation.
66**
67** Roots for the marking process are:
68** - Some PHIs or snapshots (see below).
69** - Non-PHI, non-constant values stored to PHI allocations.
70** - All guards.
71** - Any remaining loads not eliminated by store-to-load forwarding.
72** - Stores with non-constant keys.
73** - All stored values.
74*/
75static void sink_mark_ins(jit_State *J)
76{
77 IRIns *ir, *irlast = IR(J->cur.nins-1);
78 for (ir = irlast ; ; ir--) {
79 switch (ir->o) {
80 case IR_BASE:
81 return; /* Finished. */
82 case IR_CALLL: /* IRCALL_lj_tab_len */
83 case IR_ALOAD: case IR_HLOAD: case IR_XLOAD:
84 irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */
85 break;
86 case IR_FLOAD:
87 if (irt_ismarked(ir->t) || ir->op2 == IRFL_TAB_META)
88 irt_setmark(IR(ir->op1)->t); /* Mark table for remaining loads. */
89 break;
90 case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
91 IRIns *ira = sink_checkalloc(J, ir);
92 if (!ira || (irt_isphi(ira->t) && !sink_checkphi(J, ira, ir->op2)))
93 irt_setmark(IR(ir->op1)->t); /* Mark ineligible ref. */
94 irt_setmark(IR(ir->op2)->t); /* Mark stored value. */
95 break;
96 }
97#if LJ_HASFFI
98 case IR_CNEWI:
99 if (irt_isphi(ir->t) &&
100 (!sink_checkphi(J, ir, ir->op2) ||
101 (LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP &&
102 !sink_checkphi(J, ir, (ir+1)->op2))))
103 irt_setmark(ir->t); /* Mark ineligible allocation. */
104 /* fallthrough */
105#endif
106 case IR_USTORE:
107 irt_setmark(IR(ir->op2)->t); /* Mark stored value. */
108 break;
109#if LJ_HASFFI
110 case IR_CALLXS:
111#endif
112 case IR_CALLS:
113 irt_setmark(IR(ir->op1)->t); /* Mark (potentially) stored values. */
114 break;
115 case IR_PHI: {
116 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
117 irl->prev = irr->prev = 0; /* Clear PHI value counts. */
118 if (irl->o == irr->o &&
119 (irl->o == IR_TNEW || irl->o == IR_TDUP ||
120 (LJ_HASFFI && (irl->o == IR_CNEW || irl->o == IR_CNEWI))))
121 break;
122 irt_setmark(irl->t);
123 irt_setmark(irr->t);
124 break;
125 }
126 default:
127 if (irt_ismarked(ir->t) || irt_isguard(ir->t)) { /* Propagate mark. */
128 if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t);
129 if (ir->op2 >= REF_FIRST) irt_setmark(IR(ir->op2)->t);
130 }
131 break;
132 }
133 }
134}
135
136/* Mark all instructions referenced by a snapshot. */
137static void sink_mark_snap(jit_State *J, SnapShot *snap)
138{
139 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
140 MSize n, nent = snap->nent;
141 for (n = 0; n < nent; n++) {
142 IRRef ref = snap_ref(map[n]);
143 if (!irref_isk(ref))
144 irt_setmark(IR(ref)->t);
145 }
146}
147
148/* Iteratively remark PHI refs with differing marks or PHI value counts. */
149static void sink_remark_phi(jit_State *J)
150{
151 IRIns *ir;
152 int remark;
153 do {
154 remark = 0;
155 for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) {
156 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
157 if (((irl->t.irt ^ irr->t.irt) & IRT_MARK))
158 remark = 1;
159 else if (irl->prev == irr->prev)
160 continue;
161 irt_setmark(IR(ir->op1)->t);
162 irt_setmark(IR(ir->op2)->t);
163 }
164 } while (remark);
165}
166
167/* Sweep instructions and mark sunken allocations and stores. */
168static void sink_sweep_ins(jit_State *J)
169{
170 IRIns *ir, *irfirst = IR(J->cur.nk);
171 for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) {
172 switch (ir->o) {
173 case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
174 IRIns *ira = sink_checkalloc(J, ir);
175 if (ira && !irt_ismarked(ira->t))
176 ir->prev = REGSP(RID_SINK, (int)(ir - ira));
177 else
178 ir->prev = REGSP_INIT;
179 break;
180 }
181 case IR_NEWREF:
182 if (!irt_ismarked(ir->t)) {
183 ir->prev = REGSP(RID_SINK, 0);
184 } else {
185 irt_clearmark(ir->t);
186 ir->prev = REGSP_INIT;
187 }
188 break;
189#if LJ_HASFFI
190 case IR_CNEW: case IR_CNEWI:
191#endif
192 case IR_TNEW: case IR_TDUP:
193 if (!irt_ismarked(ir->t)) {
194 ir->t.irt &= ~IRT_GUARD;
195 ir->prev = REGSP(RID_SINK, 0);
196 } else {
197 irt_clearmark(ir->t);
198 ir->prev = REGSP_INIT;
199 }
200 break;
201 case IR_PHI: {
202 IRIns *ira = IR(ir->op2);
203 if (!irt_ismarked(ira->t) &&
204 (ira->o == IR_TNEW || ira->o == IR_TDUP ||
205 (LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI)))) {
206 ir->prev = REGSP(RID_SINK, 0);
207 } else {
208 ir->prev = REGSP_INIT;
209 }
210 break;
211 }
212 default:
213 irt_clearmark(ir->t);
214 ir->prev = REGSP_INIT;
215 break;
216 }
217 }
218 IR(REF_BASE)->prev = 1; /* Signal SINK flags to assembler. */
219}
220
221/* Allocation sinking and store sinking.
222**
223** 1. Mark all non-sinkable allocations.
224** 2. Then sink all remaining allocations and the related stores.
225*/
226void lj_opt_sink(jit_State *J)
227{
228 const uint32_t need = (JIT_F_OPT_SINK|JIT_F_OPT_FWD|
229 JIT_F_OPT_DCE|JIT_F_OPT_CSE|JIT_F_OPT_FOLD);
230 if ((J->flags & need) == need &&
231 (J->chain[IR_TNEW] || J->chain[IR_TDUP] ||
232 (LJ_HASFFI && (J->chain[IR_CNEW] || J->chain[IR_CNEWI])))) {
233 if (!J->loopref)
234 sink_mark_snap(J, &J->cur.snap[J->cur.nsnap-1]);
235 sink_mark_ins(J);
236 if (J->loopref)
237 sink_remark_phi(J);
238 sink_sweep_ins(J);
239 }
240}
241
242#undef IR
243
244#endif
diff --git a/src/lj_snap.c b/src/lj_snap.c
index 33edc8a6..1e6f10d0 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -11,6 +11,7 @@
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_gc.h" 13#include "lj_gc.h"
14#include "lj_tab.h"
14#include "lj_state.h" 15#include "lj_state.h"
15#include "lj_frame.h" 16#include "lj_frame.h"
16#include "lj_bc.h" 17#include "lj_bc.h"
@@ -20,10 +21,17 @@
20#include "lj_trace.h" 21#include "lj_trace.h"
21#include "lj_snap.h" 22#include "lj_snap.h"
22#include "lj_target.h" 23#include "lj_target.h"
24#if LJ_HASFFI
25#include "lj_ctype.h"
26#include "lj_cdata.h"
27#endif
23 28
24/* Some local macros to save typing. Undef'd at the end. */ 29/* Some local macros to save typing. Undef'd at the end. */
25#define IR(ref) (&J->cur.ir[(ref)]) 30#define IR(ref) (&J->cur.ir[(ref)])
26 31
32/* Pass IR on to next optimization in chain (FOLD). */
33#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
34
27/* Emit raw IR without passing through optimizations. */ 35/* Emit raw IR without passing through optimizations. */
28#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) 36#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
29 37
@@ -370,6 +378,31 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
370 } 378 }
371} 379}
372 380
381/* De-duplicate parent reference. */
382static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
383{
384 MSize j;
385 for (j = 0; j < nmax; j++)
386 if (snap_ref(map[j]) == ref)
387 return J->slot[snap_slot(map[j])];
388 return 0;
389}
390
391/* Emit parent reference with de-duplication. */
392static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
393 BloomFilter seen, IRRef ref)
394{
395 IRIns *ir = &T->ir[ref];
396 TRef tr;
397 if (irref_isk(ref))
398 tr = snap_replay_const(J, ir);
399 else if (!regsp_used(ir->prev))
400 tr = 0;
401 else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
402 tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
403 return tr;
404}
405
373/* Replay snapshot state to setup side trace. */ 406/* Replay snapshot state to setup side trace. */
374void lj_snap_replay(jit_State *J, GCtrace *T) 407void lj_snap_replay(jit_State *J, GCtrace *T)
375{ 408{
@@ -377,6 +410,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
377 SnapEntry *map = &T->snapmap[snap->mapofs]; 410 SnapEntry *map = &T->snapmap[snap->mapofs];
378 MSize n, nent = snap->nent; 411 MSize n, nent = snap->nent;
379 BloomFilter seen = 0; 412 BloomFilter seen = 0;
413 int pass23 = 0;
380 J->framedepth = 0; 414 J->framedepth = 0;
381 /* Emit IR for slots inherited from parent snapshot. */ 415 /* Emit IR for slots inherited from parent snapshot. */
382 for (n = 0; n < nent; n++) { 416 for (n = 0; n < nent; n++) {
@@ -386,21 +420,18 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
386 IRIns *ir = &T->ir[ref]; 420 IRIns *ir = &T->ir[ref];
387 TRef tr; 421 TRef tr;
388 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ 422 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
389 if (bloomtest(seen, ref)) { 423 if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
390 MSize j; 424 goto setslot;
391 for (j = 0; j < n; j++)
392 if (snap_ref(map[j]) == ref) {
393 tr = J->slot[snap_slot(map[j])];
394 goto setslot;
395 }
396 }
397 bloomset(seen, ref); 425 bloomset(seen, ref);
398 if (irref_isk(ref)) { 426 if (irref_isk(ref)) {
399 tr = snap_replay_const(J, ir); 427 tr = snap_replay_const(J, ir);
428 } else if (!regsp_used(ir->prev)) {
429 pass23 = 1;
430 lua_assert(s != 0);
431 tr = s;
400 } else { 432 } else {
401 IRType t = irt_type(ir->t); 433 IRType t = irt_type(ir->t);
402 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; 434 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
403 lua_assert(regsp_used(ir->prev));
404 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; 435 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
405 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); 436 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
406 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); 437 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
@@ -411,13 +442,126 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
411 if ((sn & SNAP_FRAME)) 442 if ((sn & SNAP_FRAME))
412 J->baseslot = s+1; 443 J->baseslot = s+1;
413 } 444 }
445 if (pass23) {
446 IRIns *irlast = &T->ir[(snap+1)->ref];
447 lua_assert(J->exitno+1 < T->nsnap);
448 pass23 = 0;
449 /* Emit dependent PVALs. */
450 for (n = 0; n < nent; n++) {
451 SnapEntry sn = map[n];
452 IRRef refp = snap_ref(sn);
453 IRIns *ir = &T->ir[refp];
454 if (regsp_reg(ir->r) == RID_SUNK) {
455 if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
456 pass23 = 1;
457 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
458 ir->o == IR_CNEW || ir->o == IR_CNEWI);
459 if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
460 if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
461 if (LJ_HASFFI && ir->o == IR_CNEWI) {
462 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
463 snap_pref(J, T, map, nent, seen, (ir+1)->op2);
464 } else {
465 IRIns *irs;
466 for (irs = ir+1; irs < irlast; irs++)
467 if (irs->r == RID_SINK && ir + irs->s == irs) {
468 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
469 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
470 else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
471 irs+1 < irlast && (irs+1)->o == IR_HIOP)
472 snap_pref(J, T, map, nent, seen, (irs+1)->op2);
473 }
474 }
475 } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
476 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
477 J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
478 }
479 }
480 /* Replay sunk instructions. */
481 for (n = 0; pass23 && n < nent; n++) {
482 SnapEntry sn = map[n];
483 IRRef refp = snap_ref(sn);
484 IRIns *ir = &T->ir[refp];
485 if (regsp_reg(ir->r) == RID_SUNK) {
486 TRef op1, op2;
487 if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */
488 J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
489 continue;
490 }
491 op1 = ir->op1;
492 if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
493 op2 = ir->op2;
494 if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
495 if (LJ_HASFFI && ir->o == IR_CNEWI) {
496 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
497 lj_needsplit(J); /* Emit joining HIOP. */
498 op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
499 snap_pref(J, T, map, nent, seen, (ir+1)->op2));
500 }
501 J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2);
502 } else {
503 IRIns *irs;
504 TRef tr = emitir(ir->ot, op1, op2);
505 J->slot[snap_slot(sn)] = tr;
506 for (irs = ir+1; irs < irlast; irs++)
507 if (irs->r == RID_SINK && ir + irs->s == irs) {
508 IRIns *irr = &T->ir[irs->op1];
509 TRef val, key = irr->op2, tmp = tr;
510 if (irr->o != IR_FREF) {
511 IRIns *irk = &T->ir[key];
512 if (irr->o == IR_HREFK)
513 key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
514 irk->op2);
515 else
516 key = snap_replay_const(J, irk);
517 if (irr->o == IR_HREFK || irr->o == IR_AREF) {
518 IRIns *irf = &T->ir[irr->op1];
519 tmp = emitir(irf->ot, tmp, irf->op2);
520 }
521 }
522 tmp = emitir(irr->ot, tmp, key);
523 val = snap_pref(J, T, map, nent, seen, irs->op2);
524 if (val == 0) {
525 IRIns *irc = &T->ir[irs->op2];
526 lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
527 val = snap_pref(J, T, map, nent, seen, irc->op1);
528 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
529 } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
530 irs+1 < irlast && (irs+1)->o == IR_HIOP) {
531 IRType t = IRT_I64;
532 if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
533 t = IRT_NUM;
534 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
535 uint64_t k = (uint32_t)T->ir[irs->op2].i +
536 ((uint64_t)T->ir[(irs+1)->op2].i << 32);
537 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
538 lj_ir_k64_find(J, k));
539 } else {
540 val = emitir_raw(IRT(IR_HIOP, t), val,
541 snap_pref(J, T, map, nent, seen, (irs+1)->op2));
542 }
543 tmp = emitir(IRT(irs->o, t), tmp, val);
544 continue;
545 }
546 tmp = emitir(irs->ot, tmp, val);
547 }
548 }
549 }
550 }
551 }
414 J->base = J->slot + J->baseslot; 552 J->base = J->slot + J->baseslot;
415 J->maxslot = snap->nslots - J->baseslot; 553 J->maxslot = snap->nslots - J->baseslot;
416 lj_snap_add(J); 554 lj_snap_add(J);
555 if (pass23) /* Need explicit GC step _after_ initial snapshot. */
556 emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
417} 557}
418 558
419/* -- Snapshot restore ---------------------------------------------------- */ 559/* -- Snapshot restore ---------------------------------------------------- */
420 560
561static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
562 SnapNo snapno, BloomFilter rfilt,
563 IRIns *ir, TValue *o);
564
421/* Restore a value from the trace exit state. */ 565/* Restore a value from the trace exit state. */
422static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, 566static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
423 SnapNo snapno, BloomFilter rfilt, 567 SnapNo snapno, BloomFilter rfilt,
@@ -450,8 +594,12 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
450 } 594 }
451 } else { /* Restore from register. */ 595 } else { /* Restore from register. */
452 Reg r = regsp_reg(rs); 596 Reg r = regsp_reg(rs);
453 lua_assert(ra_hasreg(r)); 597 if (ra_noreg(r)) {
454 if (irt_isinteger(t)) { 598 lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
599 snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
600 if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
601 return;
602 } else if (irt_isinteger(t)) {
455 setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); 603 setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
456#if !LJ_SOFTFP 604#if !LJ_SOFTFP
457 } else if (irt_isnum(t)) { 605 } else if (irt_isnum(t)) {
@@ -468,6 +616,148 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
468 } 616 }
469} 617}
470 618
619#if LJ_HASFFI
620/* Restore raw data from the trace exit state. */
621static void snap_restoredata(GCtrace *T, ExitState *ex,
622 SnapNo snapno, BloomFilter rfilt,
623 IRRef ref, void *dst, CTSize sz)
624{
625 IRIns *ir = &T->ir[ref];
626 RegSP rs = ir->prev;
627 int32_t *src;
628 union { uint64_t u64; float f; } tmp;
629 if (irref_isk(ref)) {
630 if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
631 src = mref(ir->ptr, int32_t);
632 } else if (sz == 8) {
633 tmp.u64 = (uint64_t)(uint32_t)ir->i;
634 src = (int32_t *)&tmp.u64;
635 } else {
636 src = &ir->i;
637 }
638 } else {
639 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
640 rs = snap_renameref(T, snapno, ref, rs);
641 if (ra_hasspill(regsp_spill(rs))) {
642 src = &ex->spill[regsp_spill(rs)];
643 } else {
644 Reg r = regsp_reg(rs);
645 if (ra_noreg(r)) {
646 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
647 lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
648 snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
649 *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
650 return;
651 }
652 src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
653#if !LJ_SOFTFP
654 if (r >= RID_MAX_GPR) {
655 src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
656#if LJ_TARGET_PPC
657 if (sz == 4) { /* PPC FPRs are always doubles. */
658 tmp.f = (float)*(double *)src;
659 src = (int32_t *)&tmp.f;
660 }
661#else
662 if (LJ_BE && sz == 4) src++;
663#endif
664 }
665#endif
666 }
667 }
668 lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
669 if (sz == 4) *(int32_t *)dst = *src;
670 else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
671 else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
672 else *(int16_t *)dst = (int16_t)*src;
673}
674#endif
675
676/* Unsink allocation from the trace exit state. Unsink sunk stores. */
677static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
678 SnapNo snapno, BloomFilter rfilt,
679 IRIns *ir, TValue *o)
680{
681 lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
682 ir->o == IR_CNEW || ir->o == IR_CNEWI);
683#if LJ_HASFFI
684 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
685 CTState *cts = ctype_ctsG(J2G(J));
686 CTypeID id = (CTypeID)T->ir[ir->op1].i;
687 CTSize sz = lj_ctype_size(cts, id);
688 GCcdata *cd = lj_cdata_new(cts, id, sz);
689 setcdataV(J->L, o, cd);
690 if (ir->o == IR_CNEWI) {
691 uint8_t *p = (uint8_t *)cdataptr(cd);
692 lua_assert(sz == 4 || sz == 8);
693 if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
694 snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
695 if (LJ_BE) p += 4;
696 sz = 4;
697 }
698 snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
699 } else {
700 IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
701 for (irs = ir+1; irs < irlast; irs++)
702 if (irs->r == RID_SINK && ir + irs->s == irs) {
703 IRIns *iro = &T->ir[T->ir[irs->op1].op2];
704 uint8_t *p = (uint8_t *)cd;
705 CTSize szs;
706 lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
707 lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
708 if (irt_is64(irs->t)) szs = 8;
709 else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
710 else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
711 else szs = 4;
712 if (LJ_64 && iro->o == IR_KINT64)
713 p += (int64_t)ir_k64(iro)->u64;
714 else
715 p += iro->i;
716 lua_assert(p >= (uint8_t *)cdataptr(cd) &&
717 p + szs <= (uint8_t *)cdataptr(cd) + sz);
718 if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
719 lua_assert(szs == 4);
720 snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
721 if (LJ_BE) p += 4;
722 }
723 snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
724 }
725 }
726 } else
727#endif
728 {
729 IRIns *irs, *irlast;
730 GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
731 lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
732 settabV(J->L, o, t);
733 irlast = &T->ir[T->snap[snapno].ref];
734 for (irs = ir+1; irs < irlast; irs++)
735 if (irs->r == RID_SINK && ir + irs->s == irs) {
736 IRIns *irk = &T->ir[irs->op1];
737 TValue tmp, *val;
738 lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
739 irs->o == IR_FSTORE);
740 if (irk->o == IR_FREF) {
741 lua_assert(irk->op2 == IRFL_TAB_META);
742 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
743 /* NOBARRIER: The table is new (marked white). */
744 setgcref(t->metatable, obj2gco(tabV(&tmp)));
745 } else {
746 irk = &T->ir[irk->op2];
747 if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
748 lj_ir_kvalue(J->L, &tmp, irk);
749 val = lj_tab_set(J->L, t, &tmp);
750 /* NOBARRIER: The table is new (marked white). */
751 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
752 if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
753 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
754 val->u32.hi = tmp.u32.lo;
755 }
756 }
757 }
758 }
759}
760
471/* Restore interpreter state from exit state with the help of a snapshot. */ 761/* Restore interpreter state from exit state with the help of a snapshot. */
472const BCIns *lj_snap_restore(jit_State *J, void *exptr) 762const BCIns *lj_snap_restore(jit_State *J, void *exptr)
473{ 763{
@@ -500,10 +790,23 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
500 SnapEntry sn = map[n]; 790 SnapEntry sn = map[n];
501 if (!(sn & SNAP_NORESTORE)) { 791 if (!(sn & SNAP_NORESTORE)) {
502 TValue *o = &frame[snap_slot(sn)]; 792 TValue *o = &frame[snap_slot(sn)];
503 snap_restoreval(J, T, ex, snapno, rfilt, snap_ref(sn), o); 793 IRRef ref = snap_ref(sn);
794 IRIns *ir = &T->ir[ref];
795 if (ir->r == RID_SUNK) {
796 MSize j;
797 for (j = 0; j < n; j++)
798 if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */
799 copyTV(L, o, &frame[snap_slot(map[j])]);
800 goto dupslot;
801 }
802 snap_unsink(J, T, ex, snapno, rfilt, ir, o);
803 dupslot:
804 continue;
805 }
806 snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
504 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { 807 if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
505 TValue tmp; 808 TValue tmp;
506 snap_restoreval(J, T, ex, snapno, rfilt, snap_ref(sn)+1, &tmp); 809 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
507 o->u32.hi = tmp.u32.lo; 810 o->u32.hi = tmp.u32.lo;
508 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { 811 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
509 /* Overwrite tag with frame link. */ 812 /* Overwrite tag with frame link. */
@@ -528,5 +831,6 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
528 831
529#undef IR 832#undef IR
530#undef emitir_raw 833#undef emitir_raw
834#undef emitir
531 835
532#endif 836#endif
diff --git a/src/lj_target.h b/src/lj_target.h
index 13de8fc6..4808a38c 100644
--- a/src/lj_target.h
+++ b/src/lj_target.h
@@ -16,17 +16,19 @@ typedef uint32_t Reg;
16 16
17/* The hi-bit is NOT set for an allocated register. This means the value 17/* The hi-bit is NOT set for an allocated register. This means the value
18** can be directly used without masking. The hi-bit is set for a register 18** can be directly used without masking. The hi-bit is set for a register
19** allocation hint or for RID_INIT. 19** allocation hint or for RID_INIT, RID_SINK or RID_SUNK.
20*/ 20*/
21#define RID_NONE 0x80 21#define RID_NONE 0x80
22#define RID_MASK 0x7f 22#define RID_MASK 0x7f
23#define RID_INIT (RID_NONE|RID_MASK) 23#define RID_INIT (RID_NONE|RID_MASK)
24#define RID_SINK (RID_INIT-1)
25#define RID_SUNK (RID_INIT-2)
24 26
25#define ra_noreg(r) ((r) & RID_NONE) 27#define ra_noreg(r) ((r) & RID_NONE)
26#define ra_hasreg(r) (!((r) & RID_NONE)) 28#define ra_hasreg(r) (!((r) & RID_NONE))
27 29
28/* The ra_hashint() macro assumes a previous test for ra_noreg(). */ 30/* The ra_hashint() macro assumes a previous test for ra_noreg(). */
29#define ra_hashint(r) ((r) != RID_INIT) 31#define ra_hashint(r) ((r) < RID_SUNK)
30#define ra_gethint(r) ((Reg)((r) & RID_MASK)) 32#define ra_gethint(r) ((Reg)((r) & RID_MASK))
31#define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE) 33#define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE)
32#define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0) 34#define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0)
diff --git a/src/lj_trace.c b/src/lj_trace.c
index ad00dc67..240e7fc8 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -606,6 +606,7 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud)
606 J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */ 606 J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */
607 } 607 }
608 lj_opt_split(J); 608 lj_opt_split(J);
609 lj_opt_sink(J);
609 J->state = LJ_TRACE_ASM; 610 J->state = LJ_TRACE_ASM;
610 break; 611 break;
611 612
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 1b58ceb4..b1124464 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -64,6 +64,7 @@
64#include "lj_opt_dce.c" 64#include "lj_opt_dce.c"
65#include "lj_opt_loop.c" 65#include "lj_opt_loop.c"
66#include "lj_opt_split.c" 66#include "lj_opt_split.c"
67#include "lj_opt_sink.c"
67#include "lj_mcode.c" 68#include "lj_mcode.c"
68#include "lj_snap.c" 69#include "lj_snap.c"
69#include "lj_record.c" 70#include "lj_record.c"