diff options
author | Mike Pall <mike> | 2012-07-02 23:47:12 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2012-07-02 23:47:12 +0200 |
commit | 0af3f47ba01b9634f75a1aee38e318d74dab53d0 (patch) | |
tree | 1ab064739041d279a54f89f396ae03bc9cf89864 /src | |
parent | 79e1eaa73b1bb8dd0e2ea7aeaba8504f89e5ff94 (diff) | |
download | luajit-0af3f47ba01b9634f75a1aee38e318d74dab53d0.tar.gz luajit-0af3f47ba01b9634f75a1aee38e318d74dab53d0.tar.bz2 luajit-0af3f47ba01b9634f75a1aee38e318d74dab53d0.zip |
Add allocation sinking and store sinking optimization.
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile | 2 | ||||
-rw-r--r-- | src/Makefile.dep | 20 | ||||
-rw-r--r-- | src/jit/dump.lua | 15 | ||||
-rw-r--r-- | src/lj_asm.c | 67 | ||||
-rw-r--r-- | src/lj_asm_arm.h | 49 | ||||
-rw-r--r-- | src/lj_asm_mips.h | 43 | ||||
-rw-r--r-- | src/lj_asm_ppc.h | 34 | ||||
-rw-r--r-- | src/lj_asm_x86.h | 15 | ||||
-rw-r--r-- | src/lj_iropt.h | 1 | ||||
-rw-r--r-- | src/lj_jit.h | 9 | ||||
-rw-r--r-- | src/lj_opt_sink.c | 244 | ||||
-rw-r--r-- | src/lj_snap.c | 330 | ||||
-rw-r--r-- | src/lj_target.h | 6 | ||||
-rw-r--r-- | src/lj_trace.c | 1 | ||||
-rw-r--r-- | src/ljamalg.c | 1 |
15 files changed, 749 insertions, 88 deletions
diff --git a/src/Makefile b/src/Makefile index 6e0c7463..13344a77 100644 --- a/src/Makefile +++ b/src/Makefile | |||
@@ -443,7 +443,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ | |||
443 | lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_api.o \ | 443 | lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_api.o \ |
444 | lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o \ | 444 | lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o \ |
445 | lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ | 445 | lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ |
446 | lj_opt_dce.o lj_opt_loop.o lj_opt_split.o \ | 446 | lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ |
447 | lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ | 447 | lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ |
448 | lj_asm.o lj_trace.o lj_gdbjit.o \ | 448 | lj_asm.o lj_trace.o lj_gdbjit.o \ |
449 | lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ | 449 | lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ |
diff --git a/src/Makefile.dep b/src/Makefile.dep index ff4492fb..1c7e5dc0 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep | |||
@@ -142,6 +142,8 @@ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
142 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ | 142 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ |
143 | lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | 143 | lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ |
144 | lj_dispatch.h lj_traceerr.h lj_vm.h | 144 | lj_dispatch.h lj_traceerr.h lj_vm.h |
145 | lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
146 | lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h | ||
145 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ | 147 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ |
146 | lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ | 148 | lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ |
147 | lj_iropt.h lj_vm.h | 149 | lj_iropt.h lj_vm.h |
@@ -153,8 +155,9 @@ lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
153 | lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ | 155 | lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ |
154 | lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h | 156 | lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h |
155 | lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 157 | lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
156 | lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | 158 | lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ |
157 | lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h lj_target_*.h | 159 | lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ |
160 | lj_target_*.h lj_ctype.h lj_cdata.h | ||
158 | lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 161 | lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
159 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ | 162 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ |
160 | lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ | 163 | lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ |
@@ -188,12 +191,13 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ | |||
188 | lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ | 191 | lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ |
189 | lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h \ | 192 | lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h \ |
190 | lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ | 193 | lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ |
191 | lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_mcode.c lj_snap.c \ | 194 | lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \ |
192 | lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h \ | 195 | lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ |
193 | lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h \ | 196 | lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ |
194 | lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c \ | 197 | lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ |
195 | lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c \ | 198 | lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ |
196 | lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c | 199 | lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ |
200 | lib_init.c | ||
197 | luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h | 201 | luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h |
198 | host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ | 202 | host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ |
199 | lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ | 203 | lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ |
diff --git a/src/jit/dump.lua b/src/jit/dump.lua index 3d62c4ea..98933971 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua | |||
@@ -374,10 +374,13 @@ local function dump_snap(tr) | |||
374 | end | 374 | end |
375 | 375 | ||
376 | -- Return a register name or stack slot for a rid/sp location. | 376 | -- Return a register name or stack slot for a rid/sp location. |
377 | local function ridsp_name(ridsp) | 377 | local function ridsp_name(ridsp, ins) |
378 | if not disass then disass = require("jit.dis_"..jit.arch) end | 378 | if not disass then disass = require("jit.dis_"..jit.arch) end |
379 | local rid = band(ridsp, 0xff) | 379 | local rid, slot = band(ridsp, 0xff), shr(ridsp, 8) |
380 | if ridsp > 255 then return format("[%x]", shr(ridsp, 8)*4) end | 380 | if rid == 253 or rid == 254 then |
381 | return slot == 0 and " {sink" or format(" {%04d", ins-slot) | ||
382 | end | ||
383 | if ridsp > 255 then return format("[%x]", slot*4) end | ||
381 | if rid < 128 then return disass.regname(rid) end | 384 | if rid < 128 then return disass.regname(rid) end |
382 | return "" | 385 | return "" |
383 | end | 386 | end |
@@ -458,13 +461,15 @@ local function dump_ir(tr, dumpsnap, dumpreg) | |||
458 | end | 461 | end |
459 | elseif op ~= "NOP " and op ~= "CARG " and | 462 | elseif op ~= "NOP " and op ~= "CARG " and |
460 | (dumpreg or op ~= "RENAME") then | 463 | (dumpreg or op ~= "RENAME") then |
464 | local rid = band(ridsp, 255) | ||
461 | if dumpreg then | 465 | if dumpreg then |
462 | out:write(format("%04d %-5s ", ins, ridsp_name(ridsp))) | 466 | out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins))) |
463 | else | 467 | else |
464 | out:write(format("%04d ", ins)) | 468 | out:write(format("%04d ", ins)) |
465 | end | 469 | end |
466 | out:write(format("%s%s %s %s ", | 470 | out:write(format("%s%s %s %s ", |
467 | band(ot, 128) == 0 and " " or ">", | 471 | (rid == 254 or rid == 253) and "}" or |
472 | (band(ot, 128) == 0 and " " or ">"), | ||
468 | band(ot, 64) == 0 and " " or "+", | 473 | band(ot, 64) == 0 and " " or "+", |
469 | irtype[t], op)) | 474 | irtype[t], op)) |
470 | local m1, m2 = band(m, 3), band(m, 3*4) | 475 | local m1, m2 = band(m, 3), band(m, 3*4) |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 9bce9292..8ff3eaf7 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -782,19 +782,44 @@ static int asm_snap_canremat(ASMState *as) | |||
782 | static void asm_snap_alloc1(ASMState *as, IRRef ref) | 782 | static void asm_snap_alloc1(ASMState *as, IRRef ref) |
783 | { | 783 | { |
784 | IRIns *ir = IR(ref); | 784 | IRIns *ir = IR(ref); |
785 | if (!ra_used(ir)) { | 785 | if (!(ra_used(ir) || ir->r == RID_SUNK)) { |
786 | RegSet allow = (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR; | 786 | if (ir->r == RID_SINK) { |
787 | /* Get a weak register if we have a free one or can rematerialize. */ | 787 | ir->r = RID_SUNK; |
788 | if ((as->freeset & allow) || | 788 | #if LJ_HASFFI |
789 | (allow == RSET_FPR && asm_snap_canremat(as))) { | 789 | if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */ |
790 | Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */ | 790 | asm_snap_alloc1(as, ir->op2); |
791 | if (!irt_isphi(ir->t)) | 791 | if (LJ_32 && (ir+1)->o == IR_HIOP) |
792 | ra_weak(as, r); /* But mark it as weakly referenced. */ | 792 | asm_snap_alloc1(as, (ir+1)->op2); |
793 | checkmclim(as); | 793 | } |
794 | RA_DBGX((as, "snapreg $f $r", ref, ir->r)); | 794 | #endif |
795 | else { /* Allocate stored values for TNEW, TDUP and CNEW. */ | ||
796 | IRIns *irs; | ||
797 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); | ||
798 | for (irs = IR(as->curins); irs > ir; irs--) | ||
799 | if (irs->r == RID_SINK && ir + irs->s == irs) { | ||
800 | lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || | ||
801 | irs->o == IR_FSTORE || irs->o == IR_XSTORE); | ||
802 | asm_snap_alloc1(as, irs->op2); | ||
803 | if (LJ_32 && (irs+1)->o == IR_HIOP) | ||
804 | asm_snap_alloc1(as, (irs+1)->op2); | ||
805 | } | ||
806 | } | ||
807 | } else if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT) { | ||
808 | asm_snap_alloc1(as, ir->op1); | ||
795 | } else { | 809 | } else { |
796 | ra_spill(as, ir); /* Otherwise force a spill slot. */ | 810 | RegSet allow = (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR; |
797 | RA_DBGX((as, "snapspill $f $s", ref, ir->s)); | 811 | if ((as->freeset & allow) || |
812 | (allow == RSET_FPR && asm_snap_canremat(as))) { | ||
813 | /* Get a weak register if we have a free one or can rematerialize. */ | ||
814 | Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */ | ||
815 | if (!irt_isphi(ir->t)) | ||
816 | ra_weak(as, r); /* But mark it as weakly referenced. */ | ||
817 | checkmclim(as); | ||
818 | RA_DBGX((as, "snapreg $f $r", ref, ir->r)); | ||
819 | } else { | ||
820 | ra_spill(as, ir); /* Otherwise force a spill slot. */ | ||
821 | RA_DBGX((as, "snapspill $f $s", ref, ir->s)); | ||
822 | } | ||
798 | } | 823 | } |
799 | } | 824 | } |
800 | } | 825 | } |
@@ -848,7 +873,7 @@ static void asm_snap_prep(ASMState *as) | |||
848 | { | 873 | { |
849 | if (as->curins < as->snapref) { | 874 | if (as->curins < as->snapref) { |
850 | do { | 875 | do { |
851 | lua_assert(as->snapno != 0); | 876 | if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */ |
852 | as->snapno--; | 877 | as->snapno--; |
853 | as->snapref = as->T->snap[as->snapno].ref; | 878 | as->snapref = as->T->snap[as->snapno].ref; |
854 | } while (as->curins < as->snapref); | 879 | } while (as->curins < as->snapref); |
@@ -1180,6 +1205,8 @@ static void asm_phi(ASMState *as, IRIns *ir) | |||
1180 | RegSet afree = (as->freeset & allow); | 1205 | RegSet afree = (as->freeset & allow); |
1181 | IRIns *irl = IR(ir->op1); | 1206 | IRIns *irl = IR(ir->op1); |
1182 | IRIns *irr = IR(ir->op2); | 1207 | IRIns *irr = IR(ir->op2); |
1208 | if (ir->r == RID_SINK) /* Sink PHI. */ | ||
1209 | return; | ||
1183 | /* Spill slot shuffling is not implemented yet (but rarely needed). */ | 1210 | /* Spill slot shuffling is not implemented yet (but rarely needed). */ |
1184 | if (ra_hasspill(irl->s) || ra_hasspill(irr->s)) | 1211 | if (ra_hasspill(irl->s) || ra_hasspill(irr->s)) |
1185 | lj_trace_err(as->J, LJ_TRERR_NYIPHI); | 1212 | lj_trace_err(as->J, LJ_TRERR_NYIPHI); |
@@ -1494,7 +1521,7 @@ static void asm_tail_link(ASMState *as) | |||
1494 | /* -- Trace setup --------------------------------------------------------- */ | 1521 | /* -- Trace setup --------------------------------------------------------- */ |
1495 | 1522 | ||
1496 | /* Clear reg/sp for all instructions and add register hints. */ | 1523 | /* Clear reg/sp for all instructions and add register hints. */ |
1497 | static void asm_setup_regsp(ASMState *as) | 1524 | static void asm_setup_regsp(ASMState *as, int sink) |
1498 | { | 1525 | { |
1499 | GCtrace *T = as->T; | 1526 | GCtrace *T = as->T; |
1500 | IRRef nins = T->nins; | 1527 | IRRef nins = T->nins; |
@@ -1545,6 +1572,14 @@ static void asm_setup_regsp(ASMState *as) | |||
1545 | inloop = 0; | 1572 | inloop = 0; |
1546 | as->evenspill = SPS_FIRST; | 1573 | as->evenspill = SPS_FIRST; |
1547 | for (lastir = IR(nins); ir < lastir; ir++) { | 1574 | for (lastir = IR(nins); ir < lastir; ir++) { |
1575 | if (sink) { | ||
1576 | if (ir->r == RID_SINK) | ||
1577 | continue; | ||
1578 | if (ir->r == RID_SUNK) { /* Revert after ASM restart. */ | ||
1579 | ir->r = RID_SINK; | ||
1580 | continue; | ||
1581 | } | ||
1582 | } | ||
1548 | switch (ir->o) { | 1583 | switch (ir->o) { |
1549 | case IR_LOOP: | 1584 | case IR_LOOP: |
1550 | inloop = 1; | 1585 | inloop = 1; |
@@ -1716,6 +1751,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1716 | ASMState as_; | 1751 | ASMState as_; |
1717 | ASMState *as = &as_; | 1752 | ASMState *as = &as_; |
1718 | MCode *origtop; | 1753 | MCode *origtop; |
1754 | int sink; | ||
1719 | 1755 | ||
1720 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ | 1756 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ |
1721 | J->cur.nins = lj_ir_nextins(J); | 1757 | J->cur.nins = lj_ir_nextins(J); |
@@ -1736,6 +1772,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1736 | as->mcp = as->mctop; | 1772 | as->mcp = as->mctop; |
1737 | as->mclim = as->mcbot + MCLIM_REDZONE; | 1773 | as->mclim = as->mcbot + MCLIM_REDZONE; |
1738 | asm_setup_target(as); | 1774 | asm_setup_target(as); |
1775 | sink = (IR(REF_BASE)->prev == 1); | ||
1739 | 1776 | ||
1740 | do { | 1777 | do { |
1741 | as->mcp = as->mctop; | 1778 | as->mcp = as->mctop; |
@@ -1751,7 +1788,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
1751 | as->gcsteps = 0; | 1788 | as->gcsteps = 0; |
1752 | as->sectref = as->loopref; | 1789 | as->sectref = as->loopref; |
1753 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; | 1790 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; |
1754 | asm_setup_regsp(as); | 1791 | asm_setup_regsp(as, sink); |
1755 | if (!as->loopref) | 1792 | if (!as->loopref) |
1756 | asm_tail_link(as); | 1793 | asm_tail_link(as); |
1757 | 1794 | ||
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index e6ab3573..19250254 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h | |||
@@ -693,6 +693,8 @@ static void asm_newref(ASMState *as, IRIns *ir) | |||
693 | { | 693 | { |
694 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | 694 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; |
695 | IRRef args[3]; | 695 | IRRef args[3]; |
696 | if (ir->r == RID_SINK) /* Sink newref. */ | ||
697 | return; | ||
696 | args[0] = ASMREF_L; /* lua_State *L */ | 698 | args[0] = ASMREF_L; /* lua_State *L */ |
697 | args[1] = ir->op1; /* GCtab *t */ | 699 | args[1] = ir->op1; /* GCtab *t */ |
698 | args[2] = ASMREF_TMP1; /* cTValue *key */ | 700 | args[2] = ASMREF_TMP1; /* cTValue *key */ |
@@ -836,9 +838,13 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
836 | 838 | ||
837 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 839 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) |
838 | { | 840 | { |
839 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | 841 | if (ir->r == RID_SINK) { /* Sink store. */ |
840 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 842 | asm_snap_prep(as); |
841 | rset_exclude(RSET_GPR, src), ofs); | 843 | } else { |
844 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | ||
845 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | ||
846 | rset_exclude(RSET_GPR, src), ofs); | ||
847 | } | ||
842 | } | 848 | } |
843 | 849 | ||
844 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 850 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
@@ -876,21 +882,25 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
876 | 882 | ||
877 | static void asm_ahustore(ASMState *as, IRIns *ir) | 883 | static void asm_ahustore(ASMState *as, IRIns *ir) |
878 | { | 884 | { |
879 | RegSet allow = RSET_GPR; | 885 | if (ir->r == RID_SINK) { /* Sink store. */ |
880 | Reg idx, src = RID_NONE, type = RID_NONE; | 886 | asm_snap_prep(as); |
881 | int32_t ofs = 0; | 887 | } else { |
882 | int hiop = ((ir+1)->o == IR_HIOP); | 888 | RegSet allow = RSET_GPR; |
883 | if (!irt_ispri(ir->t)) { | 889 | Reg idx, src = RID_NONE, type = RID_NONE; |
884 | src = ra_alloc1(as, ir->op2, allow); | 890 | int32_t ofs = 0; |
885 | rset_clear(allow, src); | 891 | int hiop = ((ir+1)->o == IR_HIOP); |
892 | if (!irt_ispri(ir->t)) { | ||
893 | src = ra_alloc1(as, ir->op2, allow); | ||
894 | rset_clear(allow, src); | ||
895 | } | ||
896 | if (hiop) | ||
897 | type = ra_alloc1(as, (ir+1)->op2, allow); | ||
898 | else | ||
899 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
900 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type)); | ||
901 | if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs); | ||
902 | emit_lso(as, ARMI_STR, type, idx, ofs+4); | ||
886 | } | 903 | } |
887 | if (hiop) | ||
888 | type = ra_alloc1(as, (ir+1)->op2, allow); | ||
889 | else | ||
890 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
891 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type)); | ||
892 | if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs); | ||
893 | emit_lso(as, ARMI_STR, type, idx, ofs+4); | ||
894 | } | 904 | } |
895 | 905 | ||
896 | static void asm_sload(ASMState *as, IRIns *ir) | 906 | static void asm_sload(ASMState *as, IRIns *ir) |
@@ -1382,7 +1392,10 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1382 | asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); | 1392 | asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); |
1383 | return; | 1393 | return; |
1384 | } else if ((ir-1)->o == IR_XSTORE) { | 1394 | } else if ((ir-1)->o == IR_XSTORE) { |
1385 | asm_xstore(as, ir, 4); | 1395 | if ((ir-1)->r == RID_SINK) |
1396 | asm_snap_prep(as); | ||
1397 | else | ||
1398 | asm_xstore(as, ir, 4); | ||
1386 | return; | 1399 | return; |
1387 | } | 1400 | } |
1388 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1401 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index b42f9f9a..def3eb2a 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h | |||
@@ -769,14 +769,18 @@ nolo: | |||
769 | 769 | ||
770 | static void asm_newref(ASMState *as, IRIns *ir) | 770 | static void asm_newref(ASMState *as, IRIns *ir) |
771 | { | 771 | { |
772 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | 772 | if (ir->r == RID_SINK) { /* Sink newref. */ |
773 | IRRef args[3]; | 773 | return; |
774 | args[0] = ASMREF_L; /* lua_State *L */ | 774 | } else { |
775 | args[1] = ir->op1; /* GCtab *t */ | 775 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; |
776 | args[2] = ASMREF_TMP1; /* cTValue *key */ | 776 | IRRef args[3]; |
777 | asm_setupresult(as, ir, ci); /* TValue * */ | 777 | args[0] = ASMREF_L; /* lua_State *L */ |
778 | asm_gencall(as, ci, args); | 778 | args[1] = ir->op1; /* GCtab *t */ |
779 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | 779 | args[2] = ASMREF_TMP1; /* cTValue *key */ |
780 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
781 | asm_gencall(as, ci, args); | ||
782 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
783 | } | ||
780 | } | 784 | } |
781 | 785 | ||
782 | static void asm_uref(ASMState *as, IRIns *ir) | 786 | static void asm_uref(ASMState *as, IRIns *ir) |
@@ -912,9 +916,14 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
912 | 916 | ||
913 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 917 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) |
914 | { | 918 | { |
915 | Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 919 | if (ir->r == RID_SINK) { /* Sink store. */ |
916 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 920 | asm_snap_prep(as); |
917 | rset_exclude(RSET_GPR, src), ofs); | 921 | return; |
922 | } else { | ||
923 | Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | ||
924 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | ||
925 | rset_exclude(RSET_GPR, src), ofs); | ||
926 | } | ||
918 | } | 927 | } |
919 | 928 | ||
920 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 929 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
@@ -947,6 +956,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
947 | RegSet allow = RSET_GPR; | 956 | RegSet allow = RSET_GPR; |
948 | Reg idx, src = RID_NONE, type = RID_NONE; | 957 | Reg idx, src = RID_NONE, type = RID_NONE; |
949 | int32_t ofs = 0; | 958 | int32_t ofs = 0; |
959 | if (ir->r == RID_SINK) { /* Sink store. */ | ||
960 | asm_snap_prep(as); | ||
961 | return; | ||
962 | } | ||
950 | if (irt_isnum(ir->t)) { | 963 | if (irt_isnum(ir->t)) { |
951 | src = ra_alloc1(as, ir->op2, RSET_FPR); | 964 | src = ra_alloc1(as, ir->op2, RSET_FPR); |
952 | } else { | 965 | } else { |
@@ -1561,8 +1574,12 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1561 | return; | 1574 | return; |
1562 | } else if ((ir-1)->o == IR_XSTORE) { | 1575 | } else if ((ir-1)->o == IR_XSTORE) { |
1563 | as->curins--; /* Handle both stores here. */ | 1576 | as->curins--; /* Handle both stores here. */ |
1564 | asm_xstore(as, ir, LJ_LE ? 4 : 0); | 1577 | if ((ir-1)->r == RID_SINK) { |
1565 | asm_xstore(as, ir-1, LJ_LE ? 0 : 4); | 1578 | asm_snap_prep(as); |
1579 | } else { | ||
1580 | asm_xstore(as, ir, LJ_LE ? 4 : 0); | ||
1581 | asm_xstore(as, ir-1, LJ_LE ? 0 : 4); | ||
1582 | } | ||
1566 | return; | 1583 | return; |
1567 | } | 1584 | } |
1568 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1585 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 5d538fc8..142ef212 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
@@ -773,6 +773,8 @@ static void asm_newref(ASMState *as, IRIns *ir) | |||
773 | { | 773 | { |
774 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | 774 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; |
775 | IRRef args[3]; | 775 | IRRef args[3]; |
776 | if (ir->r == RID_SINK) /* Sink newref. */ | ||
777 | return; | ||
776 | args[0] = ASMREF_L; /* lua_State *L */ | 778 | args[0] = ASMREF_L; /* lua_State *L */ |
777 | args[1] = ir->op1; /* GCtab *t */ | 779 | args[1] = ir->op1; /* GCtab *t */ |
778 | args[2] = ASMREF_TMP1; /* cTValue *key */ | 780 | args[2] = ASMREF_TMP1; /* cTValue *key */ |
@@ -892,12 +894,16 @@ static void asm_fload(ASMState *as, IRIns *ir) | |||
892 | 894 | ||
893 | static void asm_fstore(ASMState *as, IRIns *ir) | 895 | static void asm_fstore(ASMState *as, IRIns *ir) |
894 | { | 896 | { |
895 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | 897 | if (ir->r == RID_SINK) { /* Sink store. */ |
896 | IRIns *irf = IR(ir->op1); | 898 | asm_snap_prep(as); |
897 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | 899 | } else { |
898 | int32_t ofs = field_ofs[irf->op2]; | 900 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); |
899 | PPCIns pi = asm_fxstoreins(ir); | 901 | IRIns *irf = IR(ir->op1); |
900 | emit_tai(as, pi, src, idx, ofs); | 902 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); |
903 | int32_t ofs = field_ofs[irf->op2]; | ||
904 | PPCIns pi = asm_fxstoreins(ir); | ||
905 | emit_tai(as, pi, src, idx, ofs); | ||
906 | } | ||
901 | } | 907 | } |
902 | 908 | ||
903 | static void asm_xload(ASMState *as, IRIns *ir) | 909 | static void asm_xload(ASMState *as, IRIns *ir) |
@@ -912,6 +918,10 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
912 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 918 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) |
913 | { | 919 | { |
914 | IRIns *irb; | 920 | IRIns *irb; |
921 | if (ir->r == RID_SINK) { /* Sink store. */ | ||
922 | asm_snap_prep(as); | ||
923 | return; | ||
924 | } | ||
915 | if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && | 925 | if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && |
916 | ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { | 926 | ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { |
917 | /* Fuse BSWAP with XSTORE to stwbrx. */ | 927 | /* Fuse BSWAP with XSTORE to stwbrx. */ |
@@ -968,6 +978,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
968 | RegSet allow = RSET_GPR; | 978 | RegSet allow = RSET_GPR; |
969 | Reg idx, src = RID_NONE, type = RID_NONE; | 979 | Reg idx, src = RID_NONE, type = RID_NONE; |
970 | int32_t ofs = AHUREF_LSX; | 980 | int32_t ofs = AHUREF_LSX; |
981 | if (ir->r == RID_SINK) { /* Sink store. */ | ||
982 | asm_snap_prep(as); | ||
983 | return; | ||
984 | } | ||
971 | if (irt_isnum(ir->t)) { | 985 | if (irt_isnum(ir->t)) { |
972 | src = ra_alloc1(as, ir->op2, RSET_FPR); | 986 | src = ra_alloc1(as, ir->op2, RSET_FPR); |
973 | } else { | 987 | } else { |
@@ -1747,8 +1761,12 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
1747 | return; | 1761 | return; |
1748 | } else if ((ir-1)->o == IR_XSTORE) { | 1762 | } else if ((ir-1)->o == IR_XSTORE) { |
1749 | as->curins--; /* Handle both stores here. */ | 1763 | as->curins--; /* Handle both stores here. */ |
1750 | asm_xstore(as, ir, 0); | 1764 | if ((ir-1)->r == RID_SINK) { |
1751 | asm_xstore(as, ir-1, 4); | 1765 | asm_snap_prep(as); |
1766 | } else { | ||
1767 | asm_xstore(as, ir, 0); | ||
1768 | asm_xstore(as, ir-1, 4); | ||
1769 | } | ||
1752 | return; | 1770 | return; |
1753 | } | 1771 | } |
1754 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1772 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 4537e1d5..ae14b3b6 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -1155,6 +1155,8 @@ static void asm_newref(ASMState *as, IRIns *ir) | |||
1155 | IRRef args[3]; | 1155 | IRRef args[3]; |
1156 | IRIns *irkey; | 1156 | IRIns *irkey; |
1157 | Reg tmp; | 1157 | Reg tmp; |
1158 | if (ir->r == RID_SINK) /* Sink newref. */ | ||
1159 | return; | ||
1158 | args[0] = ASMREF_L; /* lua_State *L */ | 1160 | args[0] = ASMREF_L; /* lua_State *L */ |
1159 | args[1] = ir->op1; /* GCtab *t */ | 1161 | args[1] = ir->op1; /* GCtab *t */ |
1160 | args[2] = ASMREF_TMP1; /* cTValue *key */ | 1162 | args[2] = ASMREF_TMP1; /* cTValue *key */ |
@@ -1259,6 +1261,10 @@ static void asm_fxstore(ASMState *as, IRIns *ir) | |||
1259 | RegSet allow = RSET_GPR; | 1261 | RegSet allow = RSET_GPR; |
1260 | Reg src = RID_NONE, osrc = RID_NONE; | 1262 | Reg src = RID_NONE, osrc = RID_NONE; |
1261 | int32_t k = 0; | 1263 | int32_t k = 0; |
1264 | if (ir->r == RID_SINK) { /* Sink store. */ | ||
1265 | asm_snap_prep(as); | ||
1266 | return; | ||
1267 | } | ||
1262 | /* The IRT_I16/IRT_U16 stores should never be simplified for constant | 1268 | /* The IRT_I16/IRT_U16 stores should never be simplified for constant |
1263 | ** values since mov word [mem], imm16 has a length-changing prefix. | 1269 | ** values since mov word [mem], imm16 has a length-changing prefix. |
1264 | */ | 1270 | */ |
@@ -1372,6 +1378,10 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
1372 | 1378 | ||
1373 | static void asm_ahustore(ASMState *as, IRIns *ir) | 1379 | static void asm_ahustore(ASMState *as, IRIns *ir) |
1374 | { | 1380 | { |
1381 | if (ir->r == RID_SINK) { /* Sink store. */ | ||
1382 | asm_snap_prep(as); | ||
1383 | return; | ||
1384 | } | ||
1375 | if (irt_isnum(ir->t)) { | 1385 | if (irt_isnum(ir->t)) { |
1376 | Reg src = ra_alloc1(as, ir->op2, RSET_FPR); | 1386 | Reg src = ra_alloc1(as, ir->op2, RSET_FPR); |
1377 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1387 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
@@ -2251,7 +2261,10 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
2251 | asm_comp_int64(as, ir); | 2261 | asm_comp_int64(as, ir); |
2252 | return; | 2262 | return; |
2253 | } else if ((ir-1)->o == IR_XSTORE) { | 2263 | } else if ((ir-1)->o == IR_XSTORE) { |
2254 | asm_fxstore(as, ir); | 2264 | if ((ir-1)->r == RID_SINK) |
2265 | asm_snap_prep(as); | ||
2266 | else | ||
2267 | asm_fxstore(as, ir); | ||
2255 | return; | 2268 | return; |
2256 | } | 2269 | } |
2257 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 2270 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 81d522e8..a17e2065 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h | |||
@@ -154,6 +154,7 @@ LJ_FUNC void lj_opt_split(jit_State *J); | |||
154 | #else | 154 | #else |
155 | #define lj_opt_split(J) UNUSED(J) | 155 | #define lj_opt_split(J) UNUSED(J) |
156 | #endif | 156 | #endif |
157 | LJ_FUNC void lj_opt_sink(jit_State *J); | ||
157 | 158 | ||
158 | #endif | 159 | #endif |
159 | 160 | ||
diff --git a/src/lj_jit.h b/src/lj_jit.h index 28cdd17a..517b3264 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -63,19 +63,20 @@ | |||
63 | #define JIT_F_OPT_NARROW 0x00200000 | 63 | #define JIT_F_OPT_NARROW 0x00200000 |
64 | #define JIT_F_OPT_LOOP 0x00400000 | 64 | #define JIT_F_OPT_LOOP 0x00400000 |
65 | #define JIT_F_OPT_ABC 0x00800000 | 65 | #define JIT_F_OPT_ABC 0x00800000 |
66 | #define JIT_F_OPT_FUSE 0x01000000 | 66 | #define JIT_F_OPT_SINK 0x01000000 |
67 | #define JIT_F_OPT_FUSE 0x02000000 | ||
67 | 68 | ||
68 | /* Optimizations names for -O. Must match the order above. */ | 69 | /* Optimizations names for -O. Must match the order above. */ |
69 | #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD | 70 | #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD |
70 | #define JIT_F_OPTSTRING \ | 71 | #define JIT_F_OPTSTRING \ |
71 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4fuse" | 72 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" |
72 | 73 | ||
73 | /* Optimization levels set a fixed combination of flags. */ | 74 | /* Optimization levels set a fixed combination of flags. */ |
74 | #define JIT_F_OPT_0 0 | 75 | #define JIT_F_OPT_0 0 |
75 | #define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) | 76 | #define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) |
76 | #define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) | 77 | #define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) |
77 | #define JIT_F_OPT_3 \ | 78 | #define JIT_F_OPT_3 (JIT_F_OPT_2|\ |
78 | (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_FUSE) | 79 | JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) |
79 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 | 80 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 |
80 | 81 | ||
81 | #if LJ_TARGET_WINDOWS || LJ_64 | 82 | #if LJ_TARGET_WINDOWS || LJ_64 |
diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c new file mode 100644 index 00000000..80ab5b6e --- /dev/null +++ b/src/lj_opt_sink.c | |||
@@ -0,0 +1,244 @@ | |||
1 | /* | ||
2 | ** SINK: Allocation Sinking and Store Sinking. | ||
3 | ** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_opt_sink_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | |||
13 | #include "lj_ir.h" | ||
14 | #include "lj_jit.h" | ||
15 | #include "lj_iropt.h" | ||
16 | #include "lj_target.h" | ||
17 | |||
18 | /* Some local macros to save typing. Undef'd at the end. */ | ||
19 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
20 | |||
21 | /* Check whether the store ref points to an eligible allocation. */ | ||
22 | static IRIns *sink_checkalloc(jit_State *J, IRIns *irs) | ||
23 | { | ||
24 | IRIns *ir = IR(irs->op1); | ||
25 | if (!irref_isk(ir->op2)) | ||
26 | return NULL; /* Non-constant key. */ | ||
27 | if (ir->o == IR_HREFK || ir->o == IR_AREF) | ||
28 | ir = IR(ir->op1); | ||
29 | else if (!(ir->o == IR_HREF || ir->o == IR_NEWREF || | ||
30 | ir->o == IR_FREF || ir->o == IR_ADD)) | ||
31 | return NULL; /* Unhandled reference type (for XSTORE). */ | ||
32 | ir = IR(ir->op1); | ||
33 | if (!(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW)) | ||
34 | return NULL; /* Not an allocation. */ | ||
35 | if (ir + 255 < irs) | ||
36 | return NULL; /* Out of range. */ | ||
37 | return ir; /* Return allocation. */ | ||
38 | } | ||
39 | |||
40 | /* Recursively check whether a value depends on a PHI. */ | ||
41 | static int sink_phidep(jit_State *J, IRRef ref) | ||
42 | { | ||
43 | IRIns *ir = IR(ref); | ||
44 | if (irt_isphi(ir->t)) return 1; | ||
45 | if (ir->op1 >= REF_FIRST && sink_phidep(J, ir->op1)) return 1; | ||
46 | if (ir->op2 >= REF_FIRST && sink_phidep(J, ir->op2)) return 1; | ||
47 | return 0; | ||
48 | } | ||
49 | |||
50 | /* Check whether a value is a sinkable PHI or a non-PHI. */ | ||
51 | static int sink_checkphi(jit_State *J, IRIns *ira, IRRef ref) | ||
52 | { | ||
53 | if (ref >= REF_FIRST) { | ||
54 | IRIns *ir = IR(ref); | ||
55 | if (irt_isphi(ir->t) || (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT && | ||
56 | irt_isphi(IR(ir->op1)->t))) { | ||
57 | ira->prev++; | ||
58 | return 1; /* Sinkable PHI. */ | ||
59 | } | ||
60 | return !sink_phidep(J, ref); /* Must be a non-PHI then. */ | ||
61 | } | ||
62 | return 1; /* Constant (non-PHI). */ | ||
63 | } | ||
64 | |||
65 | /* Mark non-sinkable allocations using single-pass backward propagation. | ||
66 | ** | ||
67 | ** Roots for the marking process are: | ||
68 | ** - Some PHIs or snapshots (see below). | ||
69 | ** - Non-PHI, non-constant values stored to PHI allocations. | ||
70 | ** - All guards. | ||
71 | ** - Any remaining loads not eliminated by store-to-load forwarding. | ||
72 | ** - Stores with non-constant keys. | ||
73 | ** - All stored values. | ||
74 | */ | ||
75 | static void sink_mark_ins(jit_State *J) | ||
76 | { | ||
77 | IRIns *ir, *irlast = IR(J->cur.nins-1); | ||
78 | for (ir = irlast ; ; ir--) { | ||
79 | switch (ir->o) { | ||
80 | case IR_BASE: | ||
81 | return; /* Finished. */ | ||
82 | case IR_CALLL: /* IRCALL_lj_tab_len */ | ||
83 | case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: | ||
84 | irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ | ||
85 | break; | ||
86 | case IR_FLOAD: | ||
87 | if (irt_ismarked(ir->t) || ir->op2 == IRFL_TAB_META) | ||
88 | irt_setmark(IR(ir->op1)->t); /* Mark table for remaining loads. */ | ||
89 | break; | ||
90 | case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { | ||
91 | IRIns *ira = sink_checkalloc(J, ir); | ||
92 | if (!ira || (irt_isphi(ira->t) && !sink_checkphi(J, ira, ir->op2))) | ||
93 | irt_setmark(IR(ir->op1)->t); /* Mark ineligible ref. */ | ||
94 | irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ | ||
95 | break; | ||
96 | } | ||
97 | #if LJ_HASFFI | ||
98 | case IR_CNEWI: | ||
99 | if (irt_isphi(ir->t) && | ||
100 | (!sink_checkphi(J, ir, ir->op2) || | ||
101 | (LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP && | ||
102 | !sink_checkphi(J, ir, (ir+1)->op2)))) | ||
103 | irt_setmark(ir->t); /* Mark ineligible allocation. */ | ||
104 | /* fallthrough */ | ||
105 | #endif | ||
106 | case IR_USTORE: | ||
107 | irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ | ||
108 | break; | ||
109 | #if LJ_HASFFI | ||
110 | case IR_CALLXS: | ||
111 | #endif | ||
112 | case IR_CALLS: | ||
113 | irt_setmark(IR(ir->op1)->t); /* Mark (potentially) stored values. */ | ||
114 | break; | ||
115 | case IR_PHI: { | ||
116 | IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); | ||
117 | irl->prev = irr->prev = 0; /* Clear PHI value counts. */ | ||
118 | if (irl->o == irr->o && | ||
119 | (irl->o == IR_TNEW || irl->o == IR_TDUP || | ||
120 | (LJ_HASFFI && (irl->o == IR_CNEW || irl->o == IR_CNEWI)))) | ||
121 | break; | ||
122 | irt_setmark(irl->t); | ||
123 | irt_setmark(irr->t); | ||
124 | break; | ||
125 | } | ||
126 | default: | ||
127 | if (irt_ismarked(ir->t) || irt_isguard(ir->t)) { /* Propagate mark. */ | ||
128 | if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t); | ||
129 | if (ir->op2 >= REF_FIRST) irt_setmark(IR(ir->op2)->t); | ||
130 | } | ||
131 | break; | ||
132 | } | ||
133 | } | ||
134 | } | ||
135 | |||
136 | /* Mark all instructions referenced by a snapshot. */ | ||
137 | static void sink_mark_snap(jit_State *J, SnapShot *snap) | ||
138 | { | ||
139 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; | ||
140 | MSize n, nent = snap->nent; | ||
141 | for (n = 0; n < nent; n++) { | ||
142 | IRRef ref = snap_ref(map[n]); | ||
143 | if (!irref_isk(ref)) | ||
144 | irt_setmark(IR(ref)->t); | ||
145 | } | ||
146 | } | ||
147 | |||
148 | /* Iteratively remark PHI refs with differing marks or PHI value counts. */ | ||
149 | static void sink_remark_phi(jit_State *J) | ||
150 | { | ||
151 | IRIns *ir; | ||
152 | int remark; | ||
153 | do { | ||
154 | remark = 0; | ||
155 | for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) { | ||
156 | IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); | ||
157 | if (((irl->t.irt ^ irr->t.irt) & IRT_MARK)) | ||
158 | remark = 1; | ||
159 | else if (irl->prev == irr->prev) | ||
160 | continue; | ||
161 | irt_setmark(IR(ir->op1)->t); | ||
162 | irt_setmark(IR(ir->op2)->t); | ||
163 | } | ||
164 | } while (remark); | ||
165 | } | ||
166 | |||
167 | /* Sweep instructions and mark sunken allocations and stores. */ | ||
168 | static void sink_sweep_ins(jit_State *J) | ||
169 | { | ||
170 | IRIns *ir, *irfirst = IR(J->cur.nk); | ||
171 | for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) { | ||
172 | switch (ir->o) { | ||
173 | case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { | ||
174 | IRIns *ira = sink_checkalloc(J, ir); | ||
175 | if (ira && !irt_ismarked(ira->t)) | ||
176 | ir->prev = REGSP(RID_SINK, (int)(ir - ira)); | ||
177 | else | ||
178 | ir->prev = REGSP_INIT; | ||
179 | break; | ||
180 | } | ||
181 | case IR_NEWREF: | ||
182 | if (!irt_ismarked(ir->t)) { | ||
183 | ir->prev = REGSP(RID_SINK, 0); | ||
184 | } else { | ||
185 | irt_clearmark(ir->t); | ||
186 | ir->prev = REGSP_INIT; | ||
187 | } | ||
188 | break; | ||
189 | #if LJ_HASFFI | ||
190 | case IR_CNEW: case IR_CNEWI: | ||
191 | #endif | ||
192 | case IR_TNEW: case IR_TDUP: | ||
193 | if (!irt_ismarked(ir->t)) { | ||
194 | ir->t.irt &= ~IRT_GUARD; | ||
195 | ir->prev = REGSP(RID_SINK, 0); | ||
196 | } else { | ||
197 | irt_clearmark(ir->t); | ||
198 | ir->prev = REGSP_INIT; | ||
199 | } | ||
200 | break; | ||
201 | case IR_PHI: { | ||
202 | IRIns *ira = IR(ir->op2); | ||
203 | if (!irt_ismarked(ira->t) && | ||
204 | (ira->o == IR_TNEW || ira->o == IR_TDUP || | ||
205 | (LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI)))) { | ||
206 | ir->prev = REGSP(RID_SINK, 0); | ||
207 | } else { | ||
208 | ir->prev = REGSP_INIT; | ||
209 | } | ||
210 | break; | ||
211 | } | ||
212 | default: | ||
213 | irt_clearmark(ir->t); | ||
214 | ir->prev = REGSP_INIT; | ||
215 | break; | ||
216 | } | ||
217 | } | ||
218 | IR(REF_BASE)->prev = 1; /* Signal SINK flags to assembler. */ | ||
219 | } | ||
220 | |||
221 | /* Allocation sinking and store sinking. | ||
222 | ** | ||
223 | ** 1. Mark all non-sinkable allocations. | ||
224 | ** 2. Then sink all remaining allocations and the related stores. | ||
225 | */ | ||
226 | void lj_opt_sink(jit_State *J) | ||
227 | { | ||
228 | const uint32_t need = (JIT_F_OPT_SINK|JIT_F_OPT_FWD| | ||
229 | JIT_F_OPT_DCE|JIT_F_OPT_CSE|JIT_F_OPT_FOLD); | ||
230 | if ((J->flags & need) == need && | ||
231 | (J->chain[IR_TNEW] || J->chain[IR_TDUP] || | ||
232 | (LJ_HASFFI && (J->chain[IR_CNEW] || J->chain[IR_CNEWI])))) { | ||
233 | if (!J->loopref) | ||
234 | sink_mark_snap(J, &J->cur.snap[J->cur.nsnap-1]); | ||
235 | sink_mark_ins(J); | ||
236 | if (J->loopref) | ||
237 | sink_remark_phi(J); | ||
238 | sink_sweep_ins(J); | ||
239 | } | ||
240 | } | ||
241 | |||
242 | #undef IR | ||
243 | |||
244 | #endif | ||
diff --git a/src/lj_snap.c b/src/lj_snap.c index 33edc8a6..1e6f10d0 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #if LJ_HASJIT | 11 | #if LJ_HASJIT |
12 | 12 | ||
13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
14 | #include "lj_tab.h" | ||
14 | #include "lj_state.h" | 15 | #include "lj_state.h" |
15 | #include "lj_frame.h" | 16 | #include "lj_frame.h" |
16 | #include "lj_bc.h" | 17 | #include "lj_bc.h" |
@@ -20,10 +21,17 @@ | |||
20 | #include "lj_trace.h" | 21 | #include "lj_trace.h" |
21 | #include "lj_snap.h" | 22 | #include "lj_snap.h" |
22 | #include "lj_target.h" | 23 | #include "lj_target.h" |
24 | #if LJ_HASFFI | ||
25 | #include "lj_ctype.h" | ||
26 | #include "lj_cdata.h" | ||
27 | #endif | ||
23 | 28 | ||
24 | /* Some local macros to save typing. Undef'd at the end. */ | 29 | /* Some local macros to save typing. Undef'd at the end. */ |
25 | #define IR(ref) (&J->cur.ir[(ref)]) | 30 | #define IR(ref) (&J->cur.ir[(ref)]) |
26 | 31 | ||
32 | /* Pass IR on to next optimization in chain (FOLD). */ | ||
33 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) | ||
34 | |||
27 | /* Emit raw IR without passing through optimizations. */ | 35 | /* Emit raw IR without passing through optimizations. */ |
28 | #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) | 36 | #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) |
29 | 37 | ||
@@ -370,6 +378,31 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir) | |||
370 | } | 378 | } |
371 | } | 379 | } |
372 | 380 | ||
381 | /* De-duplicate parent reference. */ | ||
382 | static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) | ||
383 | { | ||
384 | MSize j; | ||
385 | for (j = 0; j < nmax; j++) | ||
386 | if (snap_ref(map[j]) == ref) | ||
387 | return J->slot[snap_slot(map[j])]; | ||
388 | return 0; | ||
389 | } | ||
390 | |||
391 | /* Emit parent reference with de-duplication. */ | ||
392 | static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax, | ||
393 | BloomFilter seen, IRRef ref) | ||
394 | { | ||
395 | IRIns *ir = &T->ir[ref]; | ||
396 | TRef tr; | ||
397 | if (irref_isk(ref)) | ||
398 | tr = snap_replay_const(J, ir); | ||
399 | else if (!regsp_used(ir->prev)) | ||
400 | tr = 0; | ||
401 | else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0) | ||
402 | tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0); | ||
403 | return tr; | ||
404 | } | ||
405 | |||
373 | /* Replay snapshot state to setup side trace. */ | 406 | /* Replay snapshot state to setup side trace. */ |
374 | void lj_snap_replay(jit_State *J, GCtrace *T) | 407 | void lj_snap_replay(jit_State *J, GCtrace *T) |
375 | { | 408 | { |
@@ -377,6 +410,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
377 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 410 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
378 | MSize n, nent = snap->nent; | 411 | MSize n, nent = snap->nent; |
379 | BloomFilter seen = 0; | 412 | BloomFilter seen = 0; |
413 | int pass23 = 0; | ||
380 | J->framedepth = 0; | 414 | J->framedepth = 0; |
381 | /* Emit IR for slots inherited from parent snapshot. */ | 415 | /* Emit IR for slots inherited from parent snapshot. */ |
382 | for (n = 0; n < nent; n++) { | 416 | for (n = 0; n < nent; n++) { |
@@ -386,21 +420,18 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
386 | IRIns *ir = &T->ir[ref]; | 420 | IRIns *ir = &T->ir[ref]; |
387 | TRef tr; | 421 | TRef tr; |
388 | /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ | 422 | /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ |
389 | if (bloomtest(seen, ref)) { | 423 | if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0) |
390 | MSize j; | 424 | goto setslot; |
391 | for (j = 0; j < n; j++) | ||
392 | if (snap_ref(map[j]) == ref) { | ||
393 | tr = J->slot[snap_slot(map[j])]; | ||
394 | goto setslot; | ||
395 | } | ||
396 | } | ||
397 | bloomset(seen, ref); | 425 | bloomset(seen, ref); |
398 | if (irref_isk(ref)) { | 426 | if (irref_isk(ref)) { |
399 | tr = snap_replay_const(J, ir); | 427 | tr = snap_replay_const(J, ir); |
428 | } else if (!regsp_used(ir->prev)) { | ||
429 | pass23 = 1; | ||
430 | lua_assert(s != 0); | ||
431 | tr = s; | ||
400 | } else { | 432 | } else { |
401 | IRType t = irt_type(ir->t); | 433 | IRType t = irt_type(ir->t); |
402 | uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; | 434 | uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; |
403 | lua_assert(regsp_used(ir->prev)); | ||
404 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; | 435 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; |
405 | if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); | 436 | if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); |
406 | tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); | 437 | tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); |
@@ -411,13 +442,126 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
411 | if ((sn & SNAP_FRAME)) | 442 | if ((sn & SNAP_FRAME)) |
412 | J->baseslot = s+1; | 443 | J->baseslot = s+1; |
413 | } | 444 | } |
445 | if (pass23) { | ||
446 | IRIns *irlast = &T->ir[(snap+1)->ref]; | ||
447 | lua_assert(J->exitno+1 < T->nsnap); | ||
448 | pass23 = 0; | ||
449 | /* Emit dependent PVALs. */ | ||
450 | for (n = 0; n < nent; n++) { | ||
451 | SnapEntry sn = map[n]; | ||
452 | IRRef refp = snap_ref(sn); | ||
453 | IRIns *ir = &T->ir[refp]; | ||
454 | if (regsp_reg(ir->r) == RID_SUNK) { | ||
455 | if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; | ||
456 | pass23 = 1; | ||
457 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || | ||
458 | ir->o == IR_CNEW || ir->o == IR_CNEWI); | ||
459 | if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); | ||
460 | if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); | ||
461 | if (LJ_HASFFI && ir->o == IR_CNEWI) { | ||
462 | if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) | ||
463 | snap_pref(J, T, map, nent, seen, (ir+1)->op2); | ||
464 | } else { | ||
465 | IRIns *irs; | ||
466 | for (irs = ir+1; irs < irlast; irs++) | ||
467 | if (irs->r == RID_SINK && ir + irs->s == irs) { | ||
468 | if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) | ||
469 | snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); | ||
470 | else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && | ||
471 | irs+1 < irlast && (irs+1)->o == IR_HIOP) | ||
472 | snap_pref(J, T, map, nent, seen, (irs+1)->op2); | ||
473 | } | ||
474 | } | ||
475 | } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { | ||
476 | lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); | ||
477 | J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); | ||
478 | } | ||
479 | } | ||
480 | /* Replay sunk instructions. */ | ||
481 | for (n = 0; pass23 && n < nent; n++) { | ||
482 | SnapEntry sn = map[n]; | ||
483 | IRRef refp = snap_ref(sn); | ||
484 | IRIns *ir = &T->ir[refp]; | ||
485 | if (regsp_reg(ir->r) == RID_SUNK) { | ||
486 | TRef op1, op2; | ||
487 | if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */ | ||
488 | J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]]; | ||
489 | continue; | ||
490 | } | ||
491 | op1 = ir->op1; | ||
492 | if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1); | ||
493 | op2 = ir->op2; | ||
494 | if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2); | ||
495 | if (LJ_HASFFI && ir->o == IR_CNEWI) { | ||
496 | if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) { | ||
497 | lj_needsplit(J); /* Emit joining HIOP. */ | ||
498 | op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2, | ||
499 | snap_pref(J, T, map, nent, seen, (ir+1)->op2)); | ||
500 | } | ||
501 | J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2); | ||
502 | } else { | ||
503 | IRIns *irs; | ||
504 | TRef tr = emitir(ir->ot, op1, op2); | ||
505 | J->slot[snap_slot(sn)] = tr; | ||
506 | for (irs = ir+1; irs < irlast; irs++) | ||
507 | if (irs->r == RID_SINK && ir + irs->s == irs) { | ||
508 | IRIns *irr = &T->ir[irs->op1]; | ||
509 | TRef val, key = irr->op2, tmp = tr; | ||
510 | if (irr->o != IR_FREF) { | ||
511 | IRIns *irk = &T->ir[key]; | ||
512 | if (irr->o == IR_HREFK) | ||
513 | key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]), | ||
514 | irk->op2); | ||
515 | else | ||
516 | key = snap_replay_const(J, irk); | ||
517 | if (irr->o == IR_HREFK || irr->o == IR_AREF) { | ||
518 | IRIns *irf = &T->ir[irr->op1]; | ||
519 | tmp = emitir(irf->ot, tmp, irf->op2); | ||
520 | } | ||
521 | } | ||
522 | tmp = emitir(irr->ot, tmp, key); | ||
523 | val = snap_pref(J, T, map, nent, seen, irs->op2); | ||
524 | if (val == 0) { | ||
525 | IRIns *irc = &T->ir[irs->op2]; | ||
526 | lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); | ||
527 | val = snap_pref(J, T, map, nent, seen, irc->op1); | ||
528 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); | ||
529 | } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && | ||
530 | irs+1 < irlast && (irs+1)->o == IR_HIOP) { | ||
531 | IRType t = IRT_I64; | ||
532 | if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) | ||
533 | t = IRT_NUM; | ||
534 | if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { | ||
535 | uint64_t k = (uint32_t)T->ir[irs->op2].i + | ||
536 | ((uint64_t)T->ir[(irs+1)->op2].i << 32); | ||
537 | val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, | ||
538 | lj_ir_k64_find(J, k)); | ||
539 | } else { | ||
540 | val = emitir_raw(IRT(IR_HIOP, t), val, | ||
541 | snap_pref(J, T, map, nent, seen, (irs+1)->op2)); | ||
542 | } | ||
543 | tmp = emitir(IRT(irs->o, t), tmp, val); | ||
544 | continue; | ||
545 | } | ||
546 | tmp = emitir(irs->ot, tmp, val); | ||
547 | } | ||
548 | } | ||
549 | } | ||
550 | } | ||
551 | } | ||
414 | J->base = J->slot + J->baseslot; | 552 | J->base = J->slot + J->baseslot; |
415 | J->maxslot = snap->nslots - J->baseslot; | 553 | J->maxslot = snap->nslots - J->baseslot; |
416 | lj_snap_add(J); | 554 | lj_snap_add(J); |
555 | if (pass23) /* Need explicit GC step _after_ initial snapshot. */ | ||
556 | emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0); | ||
417 | } | 557 | } |
418 | 558 | ||
419 | /* -- Snapshot restore ---------------------------------------------------- */ | 559 | /* -- Snapshot restore ---------------------------------------------------- */ |
420 | 560 | ||
561 | static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | ||
562 | SnapNo snapno, BloomFilter rfilt, | ||
563 | IRIns *ir, TValue *o); | ||
564 | |||
421 | /* Restore a value from the trace exit state. */ | 565 | /* Restore a value from the trace exit state. */ |
422 | static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | 566 | static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, |
423 | SnapNo snapno, BloomFilter rfilt, | 567 | SnapNo snapno, BloomFilter rfilt, |
@@ -450,8 +594,12 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | |||
450 | } | 594 | } |
451 | } else { /* Restore from register. */ | 595 | } else { /* Restore from register. */ |
452 | Reg r = regsp_reg(rs); | 596 | Reg r = regsp_reg(rs); |
453 | lua_assert(ra_hasreg(r)); | 597 | if (ra_noreg(r)) { |
454 | if (irt_isinteger(t)) { | 598 | lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); |
599 | snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); | ||
600 | if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); | ||
601 | return; | ||
602 | } else if (irt_isinteger(t)) { | ||
455 | setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); | 603 | setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); |
456 | #if !LJ_SOFTFP | 604 | #if !LJ_SOFTFP |
457 | } else if (irt_isnum(t)) { | 605 | } else if (irt_isnum(t)) { |
@@ -468,6 +616,148 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | |||
468 | } | 616 | } |
469 | } | 617 | } |
470 | 618 | ||
619 | #if LJ_HASFFI | ||
620 | /* Restore raw data from the trace exit state. */ | ||
621 | static void snap_restoredata(GCtrace *T, ExitState *ex, | ||
622 | SnapNo snapno, BloomFilter rfilt, | ||
623 | IRRef ref, void *dst, CTSize sz) | ||
624 | { | ||
625 | IRIns *ir = &T->ir[ref]; | ||
626 | RegSP rs = ir->prev; | ||
627 | int32_t *src; | ||
628 | union { uint64_t u64; float f; } tmp; | ||
629 | if (irref_isk(ref)) { | ||
630 | if (ir->o == IR_KNUM || ir->o == IR_KINT64) { | ||
631 | src = mref(ir->ptr, int32_t); | ||
632 | } else if (sz == 8) { | ||
633 | tmp.u64 = (uint64_t)(uint32_t)ir->i; | ||
634 | src = (int32_t *)&tmp.u64; | ||
635 | } else { | ||
636 | src = &ir->i; | ||
637 | } | ||
638 | } else { | ||
639 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) | ||
640 | rs = snap_renameref(T, snapno, ref, rs); | ||
641 | if (ra_hasspill(regsp_spill(rs))) { | ||
642 | src = &ex->spill[regsp_spill(rs)]; | ||
643 | } else { | ||
644 | Reg r = regsp_reg(rs); | ||
645 | if (ra_noreg(r)) { | ||
646 | /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ | ||
647 | lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); | ||
648 | snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4); | ||
649 | *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; | ||
650 | return; | ||
651 | } | ||
652 | src = (int32_t *)&ex->gpr[r-RID_MIN_GPR]; | ||
653 | #if !LJ_SOFTFP | ||
654 | if (r >= RID_MAX_GPR) { | ||
655 | src = (int32_t *)&ex->fpr[r-RID_MIN_FPR]; | ||
656 | #if LJ_TARGET_PPC | ||
657 | if (sz == 4) { /* PPC FPRs are always doubles. */ | ||
658 | tmp.f = (float)*(double *)src; | ||
659 | src = (int32_t *)&tmp.f; | ||
660 | } | ||
661 | #else | ||
662 | if (LJ_BE && sz == 4) src++; | ||
663 | #endif | ||
664 | } | ||
665 | #endif | ||
666 | } | ||
667 | } | ||
668 | lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); | ||
669 | if (sz == 4) *(int32_t *)dst = *src; | ||
670 | else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; | ||
671 | else if (sz == 1) *(int8_t *)dst = (int8_t)*src; | ||
672 | else *(int16_t *)dst = (int16_t)*src; | ||
673 | } | ||
674 | #endif | ||
675 | |||
676 | /* Unsink allocation from the trace exit state. Unsink sunk stores. */ | ||
677 | static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | ||
678 | SnapNo snapno, BloomFilter rfilt, | ||
679 | IRIns *ir, TValue *o) | ||
680 | { | ||
681 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || | ||
682 | ir->o == IR_CNEW || ir->o == IR_CNEWI); | ||
683 | #if LJ_HASFFI | ||
684 | if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { | ||
685 | CTState *cts = ctype_ctsG(J2G(J)); | ||
686 | CTypeID id = (CTypeID)T->ir[ir->op1].i; | ||
687 | CTSize sz = lj_ctype_size(cts, id); | ||
688 | GCcdata *cd = lj_cdata_new(cts, id, sz); | ||
689 | setcdataV(J->L, o, cd); | ||
690 | if (ir->o == IR_CNEWI) { | ||
691 | uint8_t *p = (uint8_t *)cdataptr(cd); | ||
692 | lua_assert(sz == 4 || sz == 8); | ||
693 | if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { | ||
694 | snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); | ||
695 | if (LJ_BE) p += 4; | ||
696 | sz = 4; | ||
697 | } | ||
698 | snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); | ||
699 | } else { | ||
700 | IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; | ||
701 | for (irs = ir+1; irs < irlast; irs++) | ||
702 | if (irs->r == RID_SINK && ir + irs->s == irs) { | ||
703 | IRIns *iro = &T->ir[T->ir[irs->op1].op2]; | ||
704 | uint8_t *p = (uint8_t *)cd; | ||
705 | CTSize szs; | ||
706 | lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD); | ||
707 | lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64); | ||
708 | if (irt_is64(irs->t)) szs = 8; | ||
709 | else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; | ||
710 | else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; | ||
711 | else szs = 4; | ||
712 | if (LJ_64 && iro->o == IR_KINT64) | ||
713 | p += (int64_t)ir_k64(iro)->u64; | ||
714 | else | ||
715 | p += iro->i; | ||
716 | lua_assert(p >= (uint8_t *)cdataptr(cd) && | ||
717 | p + szs <= (uint8_t *)cdataptr(cd) + sz); | ||
718 | if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { | ||
719 | lua_assert(szs == 4); | ||
720 | snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); | ||
721 | if (LJ_BE) p += 4; | ||
722 | } | ||
723 | snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); | ||
724 | } | ||
725 | } | ||
726 | } else | ||
727 | #endif | ||
728 | { | ||
729 | IRIns *irs, *irlast; | ||
730 | GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) : | ||
731 | lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1])); | ||
732 | settabV(J->L, o, t); | ||
733 | irlast = &T->ir[T->snap[snapno].ref]; | ||
734 | for (irs = ir+1; irs < irlast; irs++) | ||
735 | if (irs->r == RID_SINK && ir + irs->s == irs) { | ||
736 | IRIns *irk = &T->ir[irs->op1]; | ||
737 | TValue tmp, *val; | ||
738 | lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || | ||
739 | irs->o == IR_FSTORE); | ||
740 | if (irk->o == IR_FREF) { | ||
741 | lua_assert(irk->op2 == IRFL_TAB_META); | ||
742 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); | ||
743 | /* NOBARRIER: The table is new (marked white). */ | ||
744 | setgcref(t->metatable, obj2gco(tabV(&tmp))); | ||
745 | } else { | ||
746 | irk = &T->ir[irk->op2]; | ||
747 | if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1]; | ||
748 | lj_ir_kvalue(J->L, &tmp, irk); | ||
749 | val = lj_tab_set(J->L, t, &tmp); | ||
750 | /* NOBARRIER: The table is new (marked white). */ | ||
751 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); | ||
752 | if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { | ||
753 | snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); | ||
754 | val->u32.hi = tmp.u32.lo; | ||
755 | } | ||
756 | } | ||
757 | } | ||
758 | } | ||
759 | } | ||
760 | |||
471 | /* Restore interpreter state from exit state with the help of a snapshot. */ | 761 | /* Restore interpreter state from exit state with the help of a snapshot. */ |
472 | const BCIns *lj_snap_restore(jit_State *J, void *exptr) | 762 | const BCIns *lj_snap_restore(jit_State *J, void *exptr) |
473 | { | 763 | { |
@@ -500,10 +790,23 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
500 | SnapEntry sn = map[n]; | 790 | SnapEntry sn = map[n]; |
501 | if (!(sn & SNAP_NORESTORE)) { | 791 | if (!(sn & SNAP_NORESTORE)) { |
502 | TValue *o = &frame[snap_slot(sn)]; | 792 | TValue *o = &frame[snap_slot(sn)]; |
503 | snap_restoreval(J, T, ex, snapno, rfilt, snap_ref(sn), o); | 793 | IRRef ref = snap_ref(sn); |
794 | IRIns *ir = &T->ir[ref]; | ||
795 | if (ir->r == RID_SUNK) { | ||
796 | MSize j; | ||
797 | for (j = 0; j < n; j++) | ||
798 | if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */ | ||
799 | copyTV(L, o, &frame[snap_slot(map[j])]); | ||
800 | goto dupslot; | ||
801 | } | ||
802 | snap_unsink(J, T, ex, snapno, rfilt, ir, o); | ||
803 | dupslot: | ||
804 | continue; | ||
805 | } | ||
806 | snap_restoreval(J, T, ex, snapno, rfilt, ref, o); | ||
504 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { | 807 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { |
505 | TValue tmp; | 808 | TValue tmp; |
506 | snap_restoreval(J, T, ex, snapno, rfilt, snap_ref(sn)+1, &tmp); | 809 | snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); |
507 | o->u32.hi = tmp.u32.lo; | 810 | o->u32.hi = tmp.u32.lo; |
508 | } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 811 | } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
509 | /* Overwrite tag with frame link. */ | 812 | /* Overwrite tag with frame link. */ |
@@ -528,5 +831,6 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
528 | 831 | ||
529 | #undef IR | 832 | #undef IR |
530 | #undef emitir_raw | 833 | #undef emitir_raw |
834 | #undef emitir | ||
531 | 835 | ||
532 | #endif | 836 | #endif |
diff --git a/src/lj_target.h b/src/lj_target.h index 13de8fc6..4808a38c 100644 --- a/src/lj_target.h +++ b/src/lj_target.h | |||
@@ -16,17 +16,19 @@ typedef uint32_t Reg; | |||
16 | 16 | ||
17 | /* The hi-bit is NOT set for an allocated register. This means the value | 17 | /* The hi-bit is NOT set for an allocated register. This means the value |
18 | ** can be directly used without masking. The hi-bit is set for a register | 18 | ** can be directly used without masking. The hi-bit is set for a register |
19 | ** allocation hint or for RID_INIT. | 19 | ** allocation hint or for RID_INIT, RID_SINK or RID_SUNK. |
20 | */ | 20 | */ |
21 | #define RID_NONE 0x80 | 21 | #define RID_NONE 0x80 |
22 | #define RID_MASK 0x7f | 22 | #define RID_MASK 0x7f |
23 | #define RID_INIT (RID_NONE|RID_MASK) | 23 | #define RID_INIT (RID_NONE|RID_MASK) |
24 | #define RID_SINK (RID_INIT-1) | ||
25 | #define RID_SUNK (RID_INIT-2) | ||
24 | 26 | ||
25 | #define ra_noreg(r) ((r) & RID_NONE) | 27 | #define ra_noreg(r) ((r) & RID_NONE) |
26 | #define ra_hasreg(r) (!((r) & RID_NONE)) | 28 | #define ra_hasreg(r) (!((r) & RID_NONE)) |
27 | 29 | ||
28 | /* The ra_hashint() macro assumes a previous test for ra_noreg(). */ | 30 | /* The ra_hashint() macro assumes a previous test for ra_noreg(). */ |
29 | #define ra_hashint(r) ((r) != RID_INIT) | 31 | #define ra_hashint(r) ((r) < RID_SUNK) |
30 | #define ra_gethint(r) ((Reg)((r) & RID_MASK)) | 32 | #define ra_gethint(r) ((Reg)((r) & RID_MASK)) |
31 | #define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE) | 33 | #define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE) |
32 | #define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0) | 34 | #define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0) |
diff --git a/src/lj_trace.c b/src/lj_trace.c index ad00dc67..240e7fc8 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c | |||
@@ -606,6 +606,7 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) | |||
606 | J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */ | 606 | J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */ |
607 | } | 607 | } |
608 | lj_opt_split(J); | 608 | lj_opt_split(J); |
609 | lj_opt_sink(J); | ||
609 | J->state = LJ_TRACE_ASM; | 610 | J->state = LJ_TRACE_ASM; |
610 | break; | 611 | break; |
611 | 612 | ||
diff --git a/src/ljamalg.c b/src/ljamalg.c index 1b58ceb4..b1124464 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c | |||
@@ -64,6 +64,7 @@ | |||
64 | #include "lj_opt_dce.c" | 64 | #include "lj_opt_dce.c" |
65 | #include "lj_opt_loop.c" | 65 | #include "lj_opt_loop.c" |
66 | #include "lj_opt_split.c" | 66 | #include "lj_opt_split.c" |
67 | #include "lj_opt_sink.c" | ||
67 | #include "lj_mcode.c" | 68 | #include "lj_mcode.c" |
68 | #include "lj_snap.c" | 69 | #include "lj_snap.c" |
69 | #include "lj_record.c" | 70 | #include "lj_record.c" |