diff options
| author | Mike Pall <mike> | 2012-07-02 23:47:12 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2012-07-02 23:47:12 +0200 |
| commit | 0af3f47ba01b9634f75a1aee38e318d74dab53d0 (patch) | |
| tree | 1ab064739041d279a54f89f396ae03bc9cf89864 | |
| parent | 79e1eaa73b1bb8dd0e2ea7aeaba8504f89e5ff94 (diff) | |
| download | luajit-0af3f47ba01b9634f75a1aee38e318d74dab53d0.tar.gz luajit-0af3f47ba01b9634f75a1aee38e318d74dab53d0.tar.bz2 luajit-0af3f47ba01b9634f75a1aee38e318d74dab53d0.zip | |
Add allocation sinking and store sinking optimization.
| -rw-r--r-- | src/Makefile | 2 | ||||
| -rw-r--r-- | src/Makefile.dep | 20 | ||||
| -rw-r--r-- | src/jit/dump.lua | 15 | ||||
| -rw-r--r-- | src/lj_asm.c | 67 | ||||
| -rw-r--r-- | src/lj_asm_arm.h | 49 | ||||
| -rw-r--r-- | src/lj_asm_mips.h | 43 | ||||
| -rw-r--r-- | src/lj_asm_ppc.h | 34 | ||||
| -rw-r--r-- | src/lj_asm_x86.h | 15 | ||||
| -rw-r--r-- | src/lj_iropt.h | 1 | ||||
| -rw-r--r-- | src/lj_jit.h | 9 | ||||
| -rw-r--r-- | src/lj_opt_sink.c | 244 | ||||
| -rw-r--r-- | src/lj_snap.c | 330 | ||||
| -rw-r--r-- | src/lj_target.h | 6 | ||||
| -rw-r--r-- | src/lj_trace.c | 1 | ||||
| -rw-r--r-- | src/ljamalg.c | 1 |
15 files changed, 749 insertions, 88 deletions
diff --git a/src/Makefile b/src/Makefile index 6e0c7463..13344a77 100644 --- a/src/Makefile +++ b/src/Makefile | |||
| @@ -443,7 +443,7 @@ LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ | |||
| 443 | lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_api.o \ | 443 | lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_api.o \ |
| 444 | lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o \ | 444 | lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o \ |
| 445 | lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ | 445 | lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ |
| 446 | lj_opt_dce.o lj_opt_loop.o lj_opt_split.o \ | 446 | lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ |
| 447 | lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ | 447 | lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ |
| 448 | lj_asm.o lj_trace.o lj_gdbjit.o \ | 448 | lj_asm.o lj_trace.o lj_gdbjit.o \ |
| 449 | lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ | 449 | lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \ |
diff --git a/src/Makefile.dep b/src/Makefile.dep index ff4492fb..1c7e5dc0 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep | |||
| @@ -142,6 +142,8 @@ lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
| 142 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ | 142 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ |
| 143 | lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | 143 | lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ |
| 144 | lj_dispatch.h lj_traceerr.h lj_vm.h | 144 | lj_dispatch.h lj_traceerr.h lj_vm.h |
| 145 | lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | ||
| 146 | lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h | ||
| 145 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ | 147 | lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ |
| 146 | lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ | 148 | lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ |
| 147 | lj_iropt.h lj_vm.h | 149 | lj_iropt.h lj_vm.h |
| @@ -153,8 +155,9 @@ lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
| 153 | lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ | 155 | lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ |
| 154 | lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h | 156 | lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h |
| 155 | lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ | 157 | lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ |
| 156 | lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ | 158 | lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ |
| 157 | lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h lj_target_*.h | 159 | lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ |
| 160 | lj_target_*.h lj_ctype.h lj_cdata.h | ||
| 158 | lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 161 | lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
| 159 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ | 162 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ |
| 160 | lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ | 163 | lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ |
| @@ -188,12 +191,13 @@ ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ | |||
| 188 | lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ | 191 | lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ |
| 189 | lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h \ | 192 | lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h \ |
| 190 | lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ | 193 | lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ |
| 191 | lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_mcode.c lj_snap.c \ | 194 | lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \ |
| 192 | lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c lj_crecord.h \ | 195 | lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ |
| 193 | lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h lj_asm_*.h \ | 196 | lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ |
| 194 | lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c \ | 197 | lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ |
| 195 | lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c \ | 198 | lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ |
| 196 | lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c lib_init.c | 199 | lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ |
| 200 | lib_init.c | ||
| 197 | luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h | 201 | luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h |
| 198 | host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ | 202 | host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ |
| 199 | lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ | 203 | lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ |
diff --git a/src/jit/dump.lua b/src/jit/dump.lua index 3d62c4ea..98933971 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua | |||
| @@ -374,10 +374,13 @@ local function dump_snap(tr) | |||
| 374 | end | 374 | end |
| 375 | 375 | ||
| 376 | -- Return a register name or stack slot for a rid/sp location. | 376 | -- Return a register name or stack slot for a rid/sp location. |
| 377 | local function ridsp_name(ridsp) | 377 | local function ridsp_name(ridsp, ins) |
| 378 | if not disass then disass = require("jit.dis_"..jit.arch) end | 378 | if not disass then disass = require("jit.dis_"..jit.arch) end |
| 379 | local rid = band(ridsp, 0xff) | 379 | local rid, slot = band(ridsp, 0xff), shr(ridsp, 8) |
| 380 | if ridsp > 255 then return format("[%x]", shr(ridsp, 8)*4) end | 380 | if rid == 253 or rid == 254 then |
| 381 | return slot == 0 and " {sink" or format(" {%04d", ins-slot) | ||
| 382 | end | ||
| 383 | if ridsp > 255 then return format("[%x]", slot*4) end | ||
| 381 | if rid < 128 then return disass.regname(rid) end | 384 | if rid < 128 then return disass.regname(rid) end |
| 382 | return "" | 385 | return "" |
| 383 | end | 386 | end |
| @@ -458,13 +461,15 @@ local function dump_ir(tr, dumpsnap, dumpreg) | |||
| 458 | end | 461 | end |
| 459 | elseif op ~= "NOP " and op ~= "CARG " and | 462 | elseif op ~= "NOP " and op ~= "CARG " and |
| 460 | (dumpreg or op ~= "RENAME") then | 463 | (dumpreg or op ~= "RENAME") then |
| 464 | local rid = band(ridsp, 255) | ||
| 461 | if dumpreg then | 465 | if dumpreg then |
| 462 | out:write(format("%04d %-5s ", ins, ridsp_name(ridsp))) | 466 | out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins))) |
| 463 | else | 467 | else |
| 464 | out:write(format("%04d ", ins)) | 468 | out:write(format("%04d ", ins)) |
| 465 | end | 469 | end |
| 466 | out:write(format("%s%s %s %s ", | 470 | out:write(format("%s%s %s %s ", |
| 467 | band(ot, 128) == 0 and " " or ">", | 471 | (rid == 254 or rid == 253) and "}" or |
| 472 | (band(ot, 128) == 0 and " " or ">"), | ||
| 468 | band(ot, 64) == 0 and " " or "+", | 473 | band(ot, 64) == 0 and " " or "+", |
| 469 | irtype[t], op)) | 474 | irtype[t], op)) |
| 470 | local m1, m2 = band(m, 3), band(m, 3*4) | 475 | local m1, m2 = band(m, 3), band(m, 3*4) |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 9bce9292..8ff3eaf7 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -782,19 +782,44 @@ static int asm_snap_canremat(ASMState *as) | |||
| 782 | static void asm_snap_alloc1(ASMState *as, IRRef ref) | 782 | static void asm_snap_alloc1(ASMState *as, IRRef ref) |
| 783 | { | 783 | { |
| 784 | IRIns *ir = IR(ref); | 784 | IRIns *ir = IR(ref); |
| 785 | if (!ra_used(ir)) { | 785 | if (!(ra_used(ir) || ir->r == RID_SUNK)) { |
| 786 | RegSet allow = (!LJ_SOFTFP && irt_isnum(ir->t)) ? RSET_FPR : RSET_GPR; | 786 | if (ir->r == RID_SINK) { |
| 787 | /* Get a weak register if we have a free one or can rematerialize. */ | 787 | ir->r = RID_SUNK; |
| 788 | if ((as->freeset & allow) || | 788 | #if LJ_HASFFI |
| 789 | (allow == RSET_FPR && asm_snap_canremat(as))) { | 789 | if (ir->o == IR_CNEWI) { /* Allocate CNEWI value. */ |
| 790 | Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */ | 790 | asm_snap_alloc1(as, ir->op2); |
| 791 | if (!irt_isphi(ir->t)) | 791 | if (LJ_32 && (ir+1)->o == IR_HIOP) |
| 792 | ra_weak(as, r); /* But mark it as weakly referenced. */ | 792 | asm_snap_alloc1(as, (ir+1)->op2); |
| 793 | checkmclim(as); | 793 | } |
| 794 | RA_DBGX((as, "snapreg $f $r", ref, ir->r)); | 794 | #endif |
| 795 | else { /* Allocate stored values for TNEW, TDUP and CNEW. */ | ||
| 796 | IRIns *irs; | ||
| 797 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); | ||
| 798 | for (irs = IR(as->curins); irs > ir; irs--) | ||
| 799 | if (irs->r == RID_SINK && ir + irs->s == irs) { | ||
| 800 | lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || | ||
| 801 | irs->o == IR_FSTORE || irs->o == IR_XSTORE); | ||
| 802 | asm_snap_alloc1(as, irs->op2); | ||
| 803 | if (LJ_32 && (irs+1)->o == IR_HIOP) | ||
| 804 | asm_snap_alloc1(as, (irs+1)->op2); | ||
| 805 | } | ||
| 806 | } | ||
| 807 | } else if (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT) { | ||
| 808 | asm_snap_alloc1(as, ir->op1); | ||
| 795 | } else { | 809 | } else { |
| 796 | ra_spill(as, ir); /* Otherwise force a spill slot. */ | 810 | RegSet allow = (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR; |
| 797 | RA_DBGX((as, "snapspill $f $s", ref, ir->s)); | 811 | if ((as->freeset & allow) || |
| 812 | (allow == RSET_FPR && asm_snap_canremat(as))) { | ||
| 813 | /* Get a weak register if we have a free one or can rematerialize. */ | ||
| 814 | Reg r = ra_allocref(as, ref, allow); /* Allocate a register. */ | ||
| 815 | if (!irt_isphi(ir->t)) | ||
| 816 | ra_weak(as, r); /* But mark it as weakly referenced. */ | ||
| 817 | checkmclim(as); | ||
| 818 | RA_DBGX((as, "snapreg $f $r", ref, ir->r)); | ||
| 819 | } else { | ||
| 820 | ra_spill(as, ir); /* Otherwise force a spill slot. */ | ||
| 821 | RA_DBGX((as, "snapspill $f $s", ref, ir->s)); | ||
| 822 | } | ||
| 798 | } | 823 | } |
| 799 | } | 824 | } |
| 800 | } | 825 | } |
| @@ -848,7 +873,7 @@ static void asm_snap_prep(ASMState *as) | |||
| 848 | { | 873 | { |
| 849 | if (as->curins < as->snapref) { | 874 | if (as->curins < as->snapref) { |
| 850 | do { | 875 | do { |
| 851 | lua_assert(as->snapno != 0); | 876 | if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */ |
| 852 | as->snapno--; | 877 | as->snapno--; |
| 853 | as->snapref = as->T->snap[as->snapno].ref; | 878 | as->snapref = as->T->snap[as->snapno].ref; |
| 854 | } while (as->curins < as->snapref); | 879 | } while (as->curins < as->snapref); |
| @@ -1180,6 +1205,8 @@ static void asm_phi(ASMState *as, IRIns *ir) | |||
| 1180 | RegSet afree = (as->freeset & allow); | 1205 | RegSet afree = (as->freeset & allow); |
| 1181 | IRIns *irl = IR(ir->op1); | 1206 | IRIns *irl = IR(ir->op1); |
| 1182 | IRIns *irr = IR(ir->op2); | 1207 | IRIns *irr = IR(ir->op2); |
| 1208 | if (ir->r == RID_SINK) /* Sink PHI. */ | ||
| 1209 | return; | ||
| 1183 | /* Spill slot shuffling is not implemented yet (but rarely needed). */ | 1210 | /* Spill slot shuffling is not implemented yet (but rarely needed). */ |
| 1184 | if (ra_hasspill(irl->s) || ra_hasspill(irr->s)) | 1211 | if (ra_hasspill(irl->s) || ra_hasspill(irr->s)) |
| 1185 | lj_trace_err(as->J, LJ_TRERR_NYIPHI); | 1212 | lj_trace_err(as->J, LJ_TRERR_NYIPHI); |
| @@ -1494,7 +1521,7 @@ static void asm_tail_link(ASMState *as) | |||
| 1494 | /* -- Trace setup --------------------------------------------------------- */ | 1521 | /* -- Trace setup --------------------------------------------------------- */ |
| 1495 | 1522 | ||
| 1496 | /* Clear reg/sp for all instructions and add register hints. */ | 1523 | /* Clear reg/sp for all instructions and add register hints. */ |
| 1497 | static void asm_setup_regsp(ASMState *as) | 1524 | static void asm_setup_regsp(ASMState *as, int sink) |
| 1498 | { | 1525 | { |
| 1499 | GCtrace *T = as->T; | 1526 | GCtrace *T = as->T; |
| 1500 | IRRef nins = T->nins; | 1527 | IRRef nins = T->nins; |
| @@ -1545,6 +1572,14 @@ static void asm_setup_regsp(ASMState *as) | |||
| 1545 | inloop = 0; | 1572 | inloop = 0; |
| 1546 | as->evenspill = SPS_FIRST; | 1573 | as->evenspill = SPS_FIRST; |
| 1547 | for (lastir = IR(nins); ir < lastir; ir++) { | 1574 | for (lastir = IR(nins); ir < lastir; ir++) { |
| 1575 | if (sink) { | ||
| 1576 | if (ir->r == RID_SINK) | ||
| 1577 | continue; | ||
| 1578 | if (ir->r == RID_SUNK) { /* Revert after ASM restart. */ | ||
| 1579 | ir->r = RID_SINK; | ||
| 1580 | continue; | ||
| 1581 | } | ||
| 1582 | } | ||
| 1548 | switch (ir->o) { | 1583 | switch (ir->o) { |
| 1549 | case IR_LOOP: | 1584 | case IR_LOOP: |
| 1550 | inloop = 1; | 1585 | inloop = 1; |
| @@ -1716,6 +1751,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
| 1716 | ASMState as_; | 1751 | ASMState as_; |
| 1717 | ASMState *as = &as_; | 1752 | ASMState *as = &as_; |
| 1718 | MCode *origtop; | 1753 | MCode *origtop; |
| 1754 | int sink; | ||
| 1719 | 1755 | ||
| 1720 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ | 1756 | /* Ensure an initialized instruction beyond the last one for HIOP checks. */ |
| 1721 | J->cur.nins = lj_ir_nextins(J); | 1757 | J->cur.nins = lj_ir_nextins(J); |
| @@ -1736,6 +1772,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
| 1736 | as->mcp = as->mctop; | 1772 | as->mcp = as->mctop; |
| 1737 | as->mclim = as->mcbot + MCLIM_REDZONE; | 1773 | as->mclim = as->mcbot + MCLIM_REDZONE; |
| 1738 | asm_setup_target(as); | 1774 | asm_setup_target(as); |
| 1775 | sink = (IR(REF_BASE)->prev == 1); | ||
| 1739 | 1776 | ||
| 1740 | do { | 1777 | do { |
| 1741 | as->mcp = as->mctop; | 1778 | as->mcp = as->mctop; |
| @@ -1751,7 +1788,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) | |||
| 1751 | as->gcsteps = 0; | 1788 | as->gcsteps = 0; |
| 1752 | as->sectref = as->loopref; | 1789 | as->sectref = as->loopref; |
| 1753 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; | 1790 | as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED; |
| 1754 | asm_setup_regsp(as); | 1791 | asm_setup_regsp(as, sink); |
| 1755 | if (!as->loopref) | 1792 | if (!as->loopref) |
| 1756 | asm_tail_link(as); | 1793 | asm_tail_link(as); |
| 1757 | 1794 | ||
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index e6ab3573..19250254 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h | |||
| @@ -693,6 +693,8 @@ static void asm_newref(ASMState *as, IRIns *ir) | |||
| 693 | { | 693 | { |
| 694 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | 694 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; |
| 695 | IRRef args[3]; | 695 | IRRef args[3]; |
| 696 | if (ir->r == RID_SINK) /* Sink newref. */ | ||
| 697 | return; | ||
| 696 | args[0] = ASMREF_L; /* lua_State *L */ | 698 | args[0] = ASMREF_L; /* lua_State *L */ |
| 697 | args[1] = ir->op1; /* GCtab *t */ | 699 | args[1] = ir->op1; /* GCtab *t */ |
| 698 | args[2] = ASMREF_TMP1; /* cTValue *key */ | 700 | args[2] = ASMREF_TMP1; /* cTValue *key */ |
| @@ -836,9 +838,13 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
| 836 | 838 | ||
| 837 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 839 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) |
| 838 | { | 840 | { |
| 839 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | 841 | if (ir->r == RID_SINK) { /* Sink store. */ |
| 840 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 842 | asm_snap_prep(as); |
| 841 | rset_exclude(RSET_GPR, src), ofs); | 843 | } else { |
| 844 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | ||
| 845 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | ||
| 846 | rset_exclude(RSET_GPR, src), ofs); | ||
| 847 | } | ||
| 842 | } | 848 | } |
| 843 | 849 | ||
| 844 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 850 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
| @@ -876,21 +882,25 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
| 876 | 882 | ||
| 877 | static void asm_ahustore(ASMState *as, IRIns *ir) | 883 | static void asm_ahustore(ASMState *as, IRIns *ir) |
| 878 | { | 884 | { |
| 879 | RegSet allow = RSET_GPR; | 885 | if (ir->r == RID_SINK) { /* Sink store. */ |
| 880 | Reg idx, src = RID_NONE, type = RID_NONE; | 886 | asm_snap_prep(as); |
| 881 | int32_t ofs = 0; | 887 | } else { |
| 882 | int hiop = ((ir+1)->o == IR_HIOP); | 888 | RegSet allow = RSET_GPR; |
| 883 | if (!irt_ispri(ir->t)) { | 889 | Reg idx, src = RID_NONE, type = RID_NONE; |
| 884 | src = ra_alloc1(as, ir->op2, allow); | 890 | int32_t ofs = 0; |
| 885 | rset_clear(allow, src); | 891 | int hiop = ((ir+1)->o == IR_HIOP); |
| 892 | if (!irt_ispri(ir->t)) { | ||
| 893 | src = ra_alloc1(as, ir->op2, allow); | ||
| 894 | rset_clear(allow, src); | ||
| 895 | } | ||
| 896 | if (hiop) | ||
| 897 | type = ra_alloc1(as, (ir+1)->op2, allow); | ||
| 898 | else | ||
| 899 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
| 900 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type)); | ||
| 901 | if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs); | ||
| 902 | emit_lso(as, ARMI_STR, type, idx, ofs+4); | ||
| 886 | } | 903 | } |
| 887 | if (hiop) | ||
| 888 | type = ra_alloc1(as, (ir+1)->op2, allow); | ||
| 889 | else | ||
| 890 | type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | ||
| 891 | idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type)); | ||
| 892 | if (ra_hasreg(src)) emit_lso(as, ARMI_STR, src, idx, ofs); | ||
| 893 | emit_lso(as, ARMI_STR, type, idx, ofs+4); | ||
| 894 | } | 904 | } |
| 895 | 905 | ||
| 896 | static void asm_sload(ASMState *as, IRIns *ir) | 906 | static void asm_sload(ASMState *as, IRIns *ir) |
| @@ -1382,7 +1392,10 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
| 1382 | asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); | 1392 | asm_fpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); |
| 1383 | return; | 1393 | return; |
| 1384 | } else if ((ir-1)->o == IR_XSTORE) { | 1394 | } else if ((ir-1)->o == IR_XSTORE) { |
| 1385 | asm_xstore(as, ir, 4); | 1395 | if ((ir-1)->r == RID_SINK) |
| 1396 | asm_snap_prep(as); | ||
| 1397 | else | ||
| 1398 | asm_xstore(as, ir, 4); | ||
| 1386 | return; | 1399 | return; |
| 1387 | } | 1400 | } |
| 1388 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1401 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index b42f9f9a..def3eb2a 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h | |||
| @@ -769,14 +769,18 @@ nolo: | |||
| 769 | 769 | ||
| 770 | static void asm_newref(ASMState *as, IRIns *ir) | 770 | static void asm_newref(ASMState *as, IRIns *ir) |
| 771 | { | 771 | { |
| 772 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | 772 | if (ir->r == RID_SINK) { /* Sink newref. */ |
| 773 | IRRef args[3]; | 773 | return; |
| 774 | args[0] = ASMREF_L; /* lua_State *L */ | 774 | } else { |
| 775 | args[1] = ir->op1; /* GCtab *t */ | 775 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; |
| 776 | args[2] = ASMREF_TMP1; /* cTValue *key */ | 776 | IRRef args[3]; |
| 777 | asm_setupresult(as, ir, ci); /* TValue * */ | 777 | args[0] = ASMREF_L; /* lua_State *L */ |
| 778 | asm_gencall(as, ci, args); | 778 | args[1] = ir->op1; /* GCtab *t */ |
| 779 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | 779 | args[2] = ASMREF_TMP1; /* cTValue *key */ |
| 780 | asm_setupresult(as, ir, ci); /* TValue * */ | ||
| 781 | asm_gencall(as, ci, args); | ||
| 782 | asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); | ||
| 783 | } | ||
| 780 | } | 784 | } |
| 781 | 785 | ||
| 782 | static void asm_uref(ASMState *as, IRIns *ir) | 786 | static void asm_uref(ASMState *as, IRIns *ir) |
| @@ -912,9 +916,14 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
| 912 | 916 | ||
| 913 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 917 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) |
| 914 | { | 918 | { |
| 915 | Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | 919 | if (ir->r == RID_SINK) { /* Sink store. */ |
| 916 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | 920 | asm_snap_prep(as); |
| 917 | rset_exclude(RSET_GPR, src), ofs); | 921 | return; |
| 922 | } else { | ||
| 923 | Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); | ||
| 924 | asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, | ||
| 925 | rset_exclude(RSET_GPR, src), ofs); | ||
| 926 | } | ||
| 918 | } | 927 | } |
| 919 | 928 | ||
| 920 | static void asm_ahuvload(ASMState *as, IRIns *ir) | 929 | static void asm_ahuvload(ASMState *as, IRIns *ir) |
| @@ -947,6 +956,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
| 947 | RegSet allow = RSET_GPR; | 956 | RegSet allow = RSET_GPR; |
| 948 | Reg idx, src = RID_NONE, type = RID_NONE; | 957 | Reg idx, src = RID_NONE, type = RID_NONE; |
| 949 | int32_t ofs = 0; | 958 | int32_t ofs = 0; |
| 959 | if (ir->r == RID_SINK) { /* Sink store. */ | ||
| 960 | asm_snap_prep(as); | ||
| 961 | return; | ||
| 962 | } | ||
| 950 | if (irt_isnum(ir->t)) { | 963 | if (irt_isnum(ir->t)) { |
| 951 | src = ra_alloc1(as, ir->op2, RSET_FPR); | 964 | src = ra_alloc1(as, ir->op2, RSET_FPR); |
| 952 | } else { | 965 | } else { |
| @@ -1561,8 +1574,12 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
| 1561 | return; | 1574 | return; |
| 1562 | } else if ((ir-1)->o == IR_XSTORE) { | 1575 | } else if ((ir-1)->o == IR_XSTORE) { |
| 1563 | as->curins--; /* Handle both stores here. */ | 1576 | as->curins--; /* Handle both stores here. */ |
| 1564 | asm_xstore(as, ir, LJ_LE ? 4 : 0); | 1577 | if ((ir-1)->r == RID_SINK) { |
| 1565 | asm_xstore(as, ir-1, LJ_LE ? 0 : 4); | 1578 | asm_snap_prep(as); |
| 1579 | } else { | ||
| 1580 | asm_xstore(as, ir, LJ_LE ? 4 : 0); | ||
| 1581 | asm_xstore(as, ir-1, LJ_LE ? 0 : 4); | ||
| 1582 | } | ||
| 1566 | return; | 1583 | return; |
| 1567 | } | 1584 | } |
| 1568 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1585 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 5d538fc8..142ef212 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h | |||
| @@ -773,6 +773,8 @@ static void asm_newref(ASMState *as, IRIns *ir) | |||
| 773 | { | 773 | { |
| 774 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; | 774 | const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; |
| 775 | IRRef args[3]; | 775 | IRRef args[3]; |
| 776 | if (ir->r == RID_SINK) /* Sink newref. */ | ||
| 777 | return; | ||
| 776 | args[0] = ASMREF_L; /* lua_State *L */ | 778 | args[0] = ASMREF_L; /* lua_State *L */ |
| 777 | args[1] = ir->op1; /* GCtab *t */ | 779 | args[1] = ir->op1; /* GCtab *t */ |
| 778 | args[2] = ASMREF_TMP1; /* cTValue *key */ | 780 | args[2] = ASMREF_TMP1; /* cTValue *key */ |
| @@ -892,12 +894,16 @@ static void asm_fload(ASMState *as, IRIns *ir) | |||
| 892 | 894 | ||
| 893 | static void asm_fstore(ASMState *as, IRIns *ir) | 895 | static void asm_fstore(ASMState *as, IRIns *ir) |
| 894 | { | 896 | { |
| 895 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | 897 | if (ir->r == RID_SINK) { /* Sink store. */ |
| 896 | IRIns *irf = IR(ir->op1); | 898 | asm_snap_prep(as); |
| 897 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | 899 | } else { |
| 898 | int32_t ofs = field_ofs[irf->op2]; | 900 | Reg src = ra_alloc1(as, ir->op2, RSET_GPR); |
| 899 | PPCIns pi = asm_fxstoreins(ir); | 901 | IRIns *irf = IR(ir->op1); |
| 900 | emit_tai(as, pi, src, idx, ofs); | 902 | Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); |
| 903 | int32_t ofs = field_ofs[irf->op2]; | ||
| 904 | PPCIns pi = asm_fxstoreins(ir); | ||
| 905 | emit_tai(as, pi, src, idx, ofs); | ||
| 906 | } | ||
| 901 | } | 907 | } |
| 902 | 908 | ||
| 903 | static void asm_xload(ASMState *as, IRIns *ir) | 909 | static void asm_xload(ASMState *as, IRIns *ir) |
| @@ -912,6 +918,10 @@ static void asm_xload(ASMState *as, IRIns *ir) | |||
| 912 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) | 918 | static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) |
| 913 | { | 919 | { |
| 914 | IRIns *irb; | 920 | IRIns *irb; |
| 921 | if (ir->r == RID_SINK) { /* Sink store. */ | ||
| 922 | asm_snap_prep(as); | ||
| 923 | return; | ||
| 924 | } | ||
| 915 | if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && | 925 | if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && |
| 916 | ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { | 926 | ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { |
| 917 | /* Fuse BSWAP with XSTORE to stwbrx. */ | 927 | /* Fuse BSWAP with XSTORE to stwbrx. */ |
| @@ -968,6 +978,10 @@ static void asm_ahustore(ASMState *as, IRIns *ir) | |||
| 968 | RegSet allow = RSET_GPR; | 978 | RegSet allow = RSET_GPR; |
| 969 | Reg idx, src = RID_NONE, type = RID_NONE; | 979 | Reg idx, src = RID_NONE, type = RID_NONE; |
| 970 | int32_t ofs = AHUREF_LSX; | 980 | int32_t ofs = AHUREF_LSX; |
| 981 | if (ir->r == RID_SINK) { /* Sink store. */ | ||
| 982 | asm_snap_prep(as); | ||
| 983 | return; | ||
| 984 | } | ||
| 971 | if (irt_isnum(ir->t)) { | 985 | if (irt_isnum(ir->t)) { |
| 972 | src = ra_alloc1(as, ir->op2, RSET_FPR); | 986 | src = ra_alloc1(as, ir->op2, RSET_FPR); |
| 973 | } else { | 987 | } else { |
| @@ -1747,8 +1761,12 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
| 1747 | return; | 1761 | return; |
| 1748 | } else if ((ir-1)->o == IR_XSTORE) { | 1762 | } else if ((ir-1)->o == IR_XSTORE) { |
| 1749 | as->curins--; /* Handle both stores here. */ | 1763 | as->curins--; /* Handle both stores here. */ |
| 1750 | asm_xstore(as, ir, 0); | 1764 | if ((ir-1)->r == RID_SINK) { |
| 1751 | asm_xstore(as, ir-1, 4); | 1765 | asm_snap_prep(as); |
| 1766 | } else { | ||
| 1767 | asm_xstore(as, ir, 0); | ||
| 1768 | asm_xstore(as, ir-1, 4); | ||
| 1769 | } | ||
| 1752 | return; | 1770 | return; |
| 1753 | } | 1771 | } |
| 1754 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 1772 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 4537e1d5..ae14b3b6 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
| @@ -1155,6 +1155,8 @@ static void asm_newref(ASMState *as, IRIns *ir) | |||
| 1155 | IRRef args[3]; | 1155 | IRRef args[3]; |
| 1156 | IRIns *irkey; | 1156 | IRIns *irkey; |
| 1157 | Reg tmp; | 1157 | Reg tmp; |
| 1158 | if (ir->r == RID_SINK) /* Sink newref. */ | ||
| 1159 | return; | ||
| 1158 | args[0] = ASMREF_L; /* lua_State *L */ | 1160 | args[0] = ASMREF_L; /* lua_State *L */ |
| 1159 | args[1] = ir->op1; /* GCtab *t */ | 1161 | args[1] = ir->op1; /* GCtab *t */ |
| 1160 | args[2] = ASMREF_TMP1; /* cTValue *key */ | 1162 | args[2] = ASMREF_TMP1; /* cTValue *key */ |
| @@ -1259,6 +1261,10 @@ static void asm_fxstore(ASMState *as, IRIns *ir) | |||
| 1259 | RegSet allow = RSET_GPR; | 1261 | RegSet allow = RSET_GPR; |
| 1260 | Reg src = RID_NONE, osrc = RID_NONE; | 1262 | Reg src = RID_NONE, osrc = RID_NONE; |
| 1261 | int32_t k = 0; | 1263 | int32_t k = 0; |
| 1264 | if (ir->r == RID_SINK) { /* Sink store. */ | ||
| 1265 | asm_snap_prep(as); | ||
| 1266 | return; | ||
| 1267 | } | ||
| 1262 | /* The IRT_I16/IRT_U16 stores should never be simplified for constant | 1268 | /* The IRT_I16/IRT_U16 stores should never be simplified for constant |
| 1263 | ** values since mov word [mem], imm16 has a length-changing prefix. | 1269 | ** values since mov word [mem], imm16 has a length-changing prefix. |
| 1264 | */ | 1270 | */ |
| @@ -1372,6 +1378,10 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) | |||
| 1372 | 1378 | ||
| 1373 | static void asm_ahustore(ASMState *as, IRIns *ir) | 1379 | static void asm_ahustore(ASMState *as, IRIns *ir) |
| 1374 | { | 1380 | { |
| 1381 | if (ir->r == RID_SINK) { /* Sink store. */ | ||
| 1382 | asm_snap_prep(as); | ||
| 1383 | return; | ||
| 1384 | } | ||
| 1375 | if (irt_isnum(ir->t)) { | 1385 | if (irt_isnum(ir->t)) { |
| 1376 | Reg src = ra_alloc1(as, ir->op2, RSET_FPR); | 1386 | Reg src = ra_alloc1(as, ir->op2, RSET_FPR); |
| 1377 | asm_fuseahuref(as, ir->op1, RSET_GPR); | 1387 | asm_fuseahuref(as, ir->op1, RSET_GPR); |
| @@ -2251,7 +2261,10 @@ static void asm_hiop(ASMState *as, IRIns *ir) | |||
| 2251 | asm_comp_int64(as, ir); | 2261 | asm_comp_int64(as, ir); |
| 2252 | return; | 2262 | return; |
| 2253 | } else if ((ir-1)->o == IR_XSTORE) { | 2263 | } else if ((ir-1)->o == IR_XSTORE) { |
| 2254 | asm_fxstore(as, ir); | 2264 | if ((ir-1)->r == RID_SINK) |
| 2265 | asm_snap_prep(as); | ||
| 2266 | else | ||
| 2267 | asm_fxstore(as, ir); | ||
| 2255 | return; | 2268 | return; |
| 2256 | } | 2269 | } |
| 2257 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | 2270 | if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ |
diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 81d522e8..a17e2065 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h | |||
| @@ -154,6 +154,7 @@ LJ_FUNC void lj_opt_split(jit_State *J); | |||
| 154 | #else | 154 | #else |
| 155 | #define lj_opt_split(J) UNUSED(J) | 155 | #define lj_opt_split(J) UNUSED(J) |
| 156 | #endif | 156 | #endif |
| 157 | LJ_FUNC void lj_opt_sink(jit_State *J); | ||
| 157 | 158 | ||
| 158 | #endif | 159 | #endif |
| 159 | 160 | ||
diff --git a/src/lj_jit.h b/src/lj_jit.h index 28cdd17a..517b3264 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
| @@ -63,19 +63,20 @@ | |||
| 63 | #define JIT_F_OPT_NARROW 0x00200000 | 63 | #define JIT_F_OPT_NARROW 0x00200000 |
| 64 | #define JIT_F_OPT_LOOP 0x00400000 | 64 | #define JIT_F_OPT_LOOP 0x00400000 |
| 65 | #define JIT_F_OPT_ABC 0x00800000 | 65 | #define JIT_F_OPT_ABC 0x00800000 |
| 66 | #define JIT_F_OPT_FUSE 0x01000000 | 66 | #define JIT_F_OPT_SINK 0x01000000 |
| 67 | #define JIT_F_OPT_FUSE 0x02000000 | ||
| 67 | 68 | ||
| 68 | /* Optimizations names for -O. Must match the order above. */ | 69 | /* Optimizations names for -O. Must match the order above. */ |
| 69 | #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD | 70 | #define JIT_F_OPT_FIRST JIT_F_OPT_FOLD |
| 70 | #define JIT_F_OPTSTRING \ | 71 | #define JIT_F_OPTSTRING \ |
| 71 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4fuse" | 72 | "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" |
| 72 | 73 | ||
| 73 | /* Optimization levels set a fixed combination of flags. */ | 74 | /* Optimization levels set a fixed combination of flags. */ |
| 74 | #define JIT_F_OPT_0 0 | 75 | #define JIT_F_OPT_0 0 |
| 75 | #define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) | 76 | #define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE) |
| 76 | #define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) | 77 | #define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP) |
| 77 | #define JIT_F_OPT_3 \ | 78 | #define JIT_F_OPT_3 (JIT_F_OPT_2|\ |
| 78 | (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_FUSE) | 79 | JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) |
| 79 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 | 80 | #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 |
| 80 | 81 | ||
| 81 | #if LJ_TARGET_WINDOWS || LJ_64 | 82 | #if LJ_TARGET_WINDOWS || LJ_64 |
diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c new file mode 100644 index 00000000..80ab5b6e --- /dev/null +++ b/src/lj_opt_sink.c | |||
| @@ -0,0 +1,244 @@ | |||
| 1 | /* | ||
| 2 | ** SINK: Allocation Sinking and Store Sinking. | ||
| 3 | ** Copyright (C) 2005-2012 Mike Pall. See Copyright Notice in luajit.h | ||
| 4 | */ | ||
| 5 | |||
| 6 | #define lj_opt_sink_c | ||
| 7 | #define LUA_CORE | ||
| 8 | |||
| 9 | #include "lj_obj.h" | ||
| 10 | |||
| 11 | #if LJ_HASJIT | ||
| 12 | |||
| 13 | #include "lj_ir.h" | ||
| 14 | #include "lj_jit.h" | ||
| 15 | #include "lj_iropt.h" | ||
| 16 | #include "lj_target.h" | ||
| 17 | |||
| 18 | /* Some local macros to save typing. Undef'd at the end. */ | ||
| 19 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
| 20 | |||
| 21 | /* Check whether the store ref points to an eligible allocation. */ | ||
| 22 | static IRIns *sink_checkalloc(jit_State *J, IRIns *irs) | ||
| 23 | { | ||
| 24 | IRIns *ir = IR(irs->op1); | ||
| 25 | if (!irref_isk(ir->op2)) | ||
| 26 | return NULL; /* Non-constant key. */ | ||
| 27 | if (ir->o == IR_HREFK || ir->o == IR_AREF) | ||
| 28 | ir = IR(ir->op1); | ||
| 29 | else if (!(ir->o == IR_HREF || ir->o == IR_NEWREF || | ||
| 30 | ir->o == IR_FREF || ir->o == IR_ADD)) | ||
| 31 | return NULL; /* Unhandled reference type (for XSTORE). */ | ||
| 32 | ir = IR(ir->op1); | ||
| 33 | if (!(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW)) | ||
| 34 | return NULL; /* Not an allocation. */ | ||
| 35 | if (ir + 255 < irs) | ||
| 36 | return NULL; /* Out of range. */ | ||
| 37 | return ir; /* Return allocation. */ | ||
| 38 | } | ||
| 39 | |||
| 40 | /* Recursively check whether a value depends on a PHI. */ | ||
| 41 | static int sink_phidep(jit_State *J, IRRef ref) | ||
| 42 | { | ||
| 43 | IRIns *ir = IR(ref); | ||
| 44 | if (irt_isphi(ir->t)) return 1; | ||
| 45 | if (ir->op1 >= REF_FIRST && sink_phidep(J, ir->op1)) return 1; | ||
| 46 | if (ir->op2 >= REF_FIRST && sink_phidep(J, ir->op2)) return 1; | ||
| 47 | return 0; | ||
| 48 | } | ||
| 49 | |||
| 50 | /* Check whether a value is a sinkable PHI or a non-PHI. */ | ||
| 51 | static int sink_checkphi(jit_State *J, IRIns *ira, IRRef ref) | ||
| 52 | { | ||
| 53 | if (ref >= REF_FIRST) { | ||
| 54 | IRIns *ir = IR(ref); | ||
| 55 | if (irt_isphi(ir->t) || (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT && | ||
| 56 | irt_isphi(IR(ir->op1)->t))) { | ||
| 57 | ira->prev++; | ||
| 58 | return 1; /* Sinkable PHI. */ | ||
| 59 | } | ||
| 60 | return !sink_phidep(J, ref); /* Must be a non-PHI then. */ | ||
| 61 | } | ||
| 62 | return 1; /* Constant (non-PHI). */ | ||
| 63 | } | ||
| 64 | |||
| 65 | /* Mark non-sinkable allocations using single-pass backward propagation. | ||
| 66 | ** | ||
| 67 | ** Roots for the marking process are: | ||
| 68 | ** - Some PHIs or snapshots (see below). | ||
| 69 | ** - Non-PHI, non-constant values stored to PHI allocations. | ||
| 70 | ** - All guards. | ||
| 71 | ** - Any remaining loads not eliminated by store-to-load forwarding. | ||
| 72 | ** - Stores with non-constant keys. | ||
| 73 | ** - All stored values. | ||
| 74 | */ | ||
| 75 | static void sink_mark_ins(jit_State *J) | ||
| 76 | { | ||
| 77 | IRIns *ir, *irlast = IR(J->cur.nins-1); | ||
| 78 | for (ir = irlast ; ; ir--) { | ||
| 79 | switch (ir->o) { | ||
| 80 | case IR_BASE: | ||
| 81 | return; /* Finished. */ | ||
| 82 | case IR_CALLL: /* IRCALL_lj_tab_len */ | ||
| 83 | case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: | ||
| 84 | irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ | ||
| 85 | break; | ||
| 86 | case IR_FLOAD: | ||
| 87 | if (irt_ismarked(ir->t) || ir->op2 == IRFL_TAB_META) | ||
| 88 | irt_setmark(IR(ir->op1)->t); /* Mark table for remaining loads. */ | ||
| 89 | break; | ||
| 90 | case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { | ||
| 91 | IRIns *ira = sink_checkalloc(J, ir); | ||
| 92 | if (!ira || (irt_isphi(ira->t) && !sink_checkphi(J, ira, ir->op2))) | ||
| 93 | irt_setmark(IR(ir->op1)->t); /* Mark ineligible ref. */ | ||
| 94 | irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ | ||
| 95 | break; | ||
| 96 | } | ||
| 97 | #if LJ_HASFFI | ||
| 98 | case IR_CNEWI: | ||
| 99 | if (irt_isphi(ir->t) && | ||
| 100 | (!sink_checkphi(J, ir, ir->op2) || | ||
| 101 | (LJ_32 && ir+1 < irlast && (ir+1)->o == IR_HIOP && | ||
| 102 | !sink_checkphi(J, ir, (ir+1)->op2)))) | ||
| 103 | irt_setmark(ir->t); /* Mark ineligible allocation. */ | ||
| 104 | /* fallthrough */ | ||
| 105 | #endif | ||
| 106 | case IR_USTORE: | ||
| 107 | irt_setmark(IR(ir->op2)->t); /* Mark stored value. */ | ||
| 108 | break; | ||
| 109 | #if LJ_HASFFI | ||
| 110 | case IR_CALLXS: | ||
| 111 | #endif | ||
| 112 | case IR_CALLS: | ||
| 113 | irt_setmark(IR(ir->op1)->t); /* Mark (potentially) stored values. */ | ||
| 114 | break; | ||
| 115 | case IR_PHI: { | ||
| 116 | IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); | ||
| 117 | irl->prev = irr->prev = 0; /* Clear PHI value counts. */ | ||
| 118 | if (irl->o == irr->o && | ||
| 119 | (irl->o == IR_TNEW || irl->o == IR_TDUP || | ||
| 120 | (LJ_HASFFI && (irl->o == IR_CNEW || irl->o == IR_CNEWI)))) | ||
| 121 | break; | ||
| 122 | irt_setmark(irl->t); | ||
| 123 | irt_setmark(irr->t); | ||
| 124 | break; | ||
| 125 | } | ||
| 126 | default: | ||
| 127 | if (irt_ismarked(ir->t) || irt_isguard(ir->t)) { /* Propagate mark. */ | ||
| 128 | if (ir->op1 >= REF_FIRST) irt_setmark(IR(ir->op1)->t); | ||
| 129 | if (ir->op2 >= REF_FIRST) irt_setmark(IR(ir->op2)->t); | ||
| 130 | } | ||
| 131 | break; | ||
| 132 | } | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | /* Mark all instructions referenced by a snapshot. */ | ||
| 137 | static void sink_mark_snap(jit_State *J, SnapShot *snap) | ||
| 138 | { | ||
| 139 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; | ||
| 140 | MSize n, nent = snap->nent; | ||
| 141 | for (n = 0; n < nent; n++) { | ||
| 142 | IRRef ref = snap_ref(map[n]); | ||
| 143 | if (!irref_isk(ref)) | ||
| 144 | irt_setmark(IR(ref)->t); | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | /* Iteratively remark PHI refs with differing marks or PHI value counts. */ | ||
| 149 | static void sink_remark_phi(jit_State *J) | ||
| 150 | { | ||
| 151 | IRIns *ir; | ||
| 152 | int remark; | ||
| 153 | do { | ||
| 154 | remark = 0; | ||
| 155 | for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) { | ||
| 156 | IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); | ||
| 157 | if (((irl->t.irt ^ irr->t.irt) & IRT_MARK)) | ||
| 158 | remark = 1; | ||
| 159 | else if (irl->prev == irr->prev) | ||
| 160 | continue; | ||
| 161 | irt_setmark(IR(ir->op1)->t); | ||
| 162 | irt_setmark(IR(ir->op2)->t); | ||
| 163 | } | ||
| 164 | } while (remark); | ||
| 165 | } | ||
| 166 | |||
| 167 | /* Sweep instructions and mark sunken allocations and stores. */ | ||
| 168 | static void sink_sweep_ins(jit_State *J) | ||
| 169 | { | ||
| 170 | IRIns *ir, *irfirst = IR(J->cur.nk); | ||
| 171 | for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) { | ||
| 172 | switch (ir->o) { | ||
| 173 | case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { | ||
| 174 | IRIns *ira = sink_checkalloc(J, ir); | ||
| 175 | if (ira && !irt_ismarked(ira->t)) | ||
| 176 | ir->prev = REGSP(RID_SINK, (int)(ir - ira)); | ||
| 177 | else | ||
| 178 | ir->prev = REGSP_INIT; | ||
| 179 | break; | ||
| 180 | } | ||
| 181 | case IR_NEWREF: | ||
| 182 | if (!irt_ismarked(ir->t)) { | ||
| 183 | ir->prev = REGSP(RID_SINK, 0); | ||
| 184 | } else { | ||
| 185 | irt_clearmark(ir->t); | ||
| 186 | ir->prev = REGSP_INIT; | ||
| 187 | } | ||
| 188 | break; | ||
| 189 | #if LJ_HASFFI | ||
| 190 | case IR_CNEW: case IR_CNEWI: | ||
| 191 | #endif | ||
| 192 | case IR_TNEW: case IR_TDUP: | ||
| 193 | if (!irt_ismarked(ir->t)) { | ||
| 194 | ir->t.irt &= ~IRT_GUARD; | ||
| 195 | ir->prev = REGSP(RID_SINK, 0); | ||
| 196 | } else { | ||
| 197 | irt_clearmark(ir->t); | ||
| 198 | ir->prev = REGSP_INIT; | ||
| 199 | } | ||
| 200 | break; | ||
| 201 | case IR_PHI: { | ||
| 202 | IRIns *ira = IR(ir->op2); | ||
| 203 | if (!irt_ismarked(ira->t) && | ||
| 204 | (ira->o == IR_TNEW || ira->o == IR_TDUP || | ||
| 205 | (LJ_HASFFI && (ira->o == IR_CNEW || ira->o == IR_CNEWI)))) { | ||
| 206 | ir->prev = REGSP(RID_SINK, 0); | ||
| 207 | } else { | ||
| 208 | ir->prev = REGSP_INIT; | ||
| 209 | } | ||
| 210 | break; | ||
| 211 | } | ||
| 212 | default: | ||
| 213 | irt_clearmark(ir->t); | ||
| 214 | ir->prev = REGSP_INIT; | ||
| 215 | break; | ||
| 216 | } | ||
| 217 | } | ||
| 218 | IR(REF_BASE)->prev = 1; /* Signal SINK flags to assembler. */ | ||
| 219 | } | ||
| 220 | |||
| 221 | /* Allocation sinking and store sinking. | ||
| 222 | ** | ||
| 223 | ** 1. Mark all non-sinkable allocations. | ||
| 224 | ** 2. Then sink all remaining allocations and the related stores. | ||
| 225 | */ | ||
| 226 | void lj_opt_sink(jit_State *J) | ||
| 227 | { | ||
| 228 | const uint32_t need = (JIT_F_OPT_SINK|JIT_F_OPT_FWD| | ||
| 229 | JIT_F_OPT_DCE|JIT_F_OPT_CSE|JIT_F_OPT_FOLD); | ||
| 230 | if ((J->flags & need) == need && | ||
| 231 | (J->chain[IR_TNEW] || J->chain[IR_TDUP] || | ||
| 232 | (LJ_HASFFI && (J->chain[IR_CNEW] || J->chain[IR_CNEWI])))) { | ||
| 233 | if (!J->loopref) | ||
| 234 | sink_mark_snap(J, &J->cur.snap[J->cur.nsnap-1]); | ||
| 235 | sink_mark_ins(J); | ||
| 236 | if (J->loopref) | ||
| 237 | sink_remark_phi(J); | ||
| 238 | sink_sweep_ins(J); | ||
| 239 | } | ||
| 240 | } | ||
| 241 | |||
| 242 | #undef IR | ||
| 243 | |||
| 244 | #endif | ||
diff --git a/src/lj_snap.c b/src/lj_snap.c index 33edc8a6..1e6f10d0 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #if LJ_HASJIT | 11 | #if LJ_HASJIT |
| 12 | 12 | ||
| 13 | #include "lj_gc.h" | 13 | #include "lj_gc.h" |
| 14 | #include "lj_tab.h" | ||
| 14 | #include "lj_state.h" | 15 | #include "lj_state.h" |
| 15 | #include "lj_frame.h" | 16 | #include "lj_frame.h" |
| 16 | #include "lj_bc.h" | 17 | #include "lj_bc.h" |
| @@ -20,10 +21,17 @@ | |||
| 20 | #include "lj_trace.h" | 21 | #include "lj_trace.h" |
| 21 | #include "lj_snap.h" | 22 | #include "lj_snap.h" |
| 22 | #include "lj_target.h" | 23 | #include "lj_target.h" |
| 24 | #if LJ_HASFFI | ||
| 25 | #include "lj_ctype.h" | ||
| 26 | #include "lj_cdata.h" | ||
| 27 | #endif | ||
| 23 | 28 | ||
| 24 | /* Some local macros to save typing. Undef'd at the end. */ | 29 | /* Some local macros to save typing. Undef'd at the end. */ |
| 25 | #define IR(ref) (&J->cur.ir[(ref)]) | 30 | #define IR(ref) (&J->cur.ir[(ref)]) |
| 26 | 31 | ||
| 32 | /* Pass IR on to next optimization in chain (FOLD). */ | ||
| 33 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) | ||
| 34 | |||
| 27 | /* Emit raw IR without passing through optimizations. */ | 35 | /* Emit raw IR without passing through optimizations. */ |
| 28 | #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) | 36 | #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) |
| 29 | 37 | ||
| @@ -370,6 +378,31 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir) | |||
| 370 | } | 378 | } |
| 371 | } | 379 | } |
| 372 | 380 | ||
| 381 | /* De-duplicate parent reference. */ | ||
| 382 | static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) | ||
| 383 | { | ||
| 384 | MSize j; | ||
| 385 | for (j = 0; j < nmax; j++) | ||
| 386 | if (snap_ref(map[j]) == ref) | ||
| 387 | return J->slot[snap_slot(map[j])]; | ||
| 388 | return 0; | ||
| 389 | } | ||
| 390 | |||
| 391 | /* Emit parent reference with de-duplication. */ | ||
| 392 | static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax, | ||
| 393 | BloomFilter seen, IRRef ref) | ||
| 394 | { | ||
| 395 | IRIns *ir = &T->ir[ref]; | ||
| 396 | TRef tr; | ||
| 397 | if (irref_isk(ref)) | ||
| 398 | tr = snap_replay_const(J, ir); | ||
| 399 | else if (!regsp_used(ir->prev)) | ||
| 400 | tr = 0; | ||
| 401 | else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0) | ||
| 402 | tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0); | ||
| 403 | return tr; | ||
| 404 | } | ||
| 405 | |||
| 373 | /* Replay snapshot state to setup side trace. */ | 406 | /* Replay snapshot state to setup side trace. */ |
| 374 | void lj_snap_replay(jit_State *J, GCtrace *T) | 407 | void lj_snap_replay(jit_State *J, GCtrace *T) |
| 375 | { | 408 | { |
| @@ -377,6 +410,7 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
| 377 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 410 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
| 378 | MSize n, nent = snap->nent; | 411 | MSize n, nent = snap->nent; |
| 379 | BloomFilter seen = 0; | 412 | BloomFilter seen = 0; |
| 413 | int pass23 = 0; | ||
| 380 | J->framedepth = 0; | 414 | J->framedepth = 0; |
| 381 | /* Emit IR for slots inherited from parent snapshot. */ | 415 | /* Emit IR for slots inherited from parent snapshot. */ |
| 382 | for (n = 0; n < nent; n++) { | 416 | for (n = 0; n < nent; n++) { |
| @@ -386,21 +420,18 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
| 386 | IRIns *ir = &T->ir[ref]; | 420 | IRIns *ir = &T->ir[ref]; |
| 387 | TRef tr; | 421 | TRef tr; |
| 388 | /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ | 422 | /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ |
| 389 | if (bloomtest(seen, ref)) { | 423 | if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0) |
| 390 | MSize j; | 424 | goto setslot; |
| 391 | for (j = 0; j < n; j++) | ||
| 392 | if (snap_ref(map[j]) == ref) { | ||
| 393 | tr = J->slot[snap_slot(map[j])]; | ||
| 394 | goto setslot; | ||
| 395 | } | ||
| 396 | } | ||
| 397 | bloomset(seen, ref); | 425 | bloomset(seen, ref); |
| 398 | if (irref_isk(ref)) { | 426 | if (irref_isk(ref)) { |
| 399 | tr = snap_replay_const(J, ir); | 427 | tr = snap_replay_const(J, ir); |
| 428 | } else if (!regsp_used(ir->prev)) { | ||
| 429 | pass23 = 1; | ||
| 430 | lua_assert(s != 0); | ||
| 431 | tr = s; | ||
| 400 | } else { | 432 | } else { |
| 401 | IRType t = irt_type(ir->t); | 433 | IRType t = irt_type(ir->t); |
| 402 | uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; | 434 | uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; |
| 403 | lua_assert(regsp_used(ir->prev)); | ||
| 404 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; | 435 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; |
| 405 | if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); | 436 | if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); |
| 406 | tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); | 437 | tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); |
| @@ -411,13 +442,126 @@ void lj_snap_replay(jit_State *J, GCtrace *T) | |||
| 411 | if ((sn & SNAP_FRAME)) | 442 | if ((sn & SNAP_FRAME)) |
| 412 | J->baseslot = s+1; | 443 | J->baseslot = s+1; |
| 413 | } | 444 | } |
| 445 | if (pass23) { | ||
| 446 | IRIns *irlast = &T->ir[(snap+1)->ref]; | ||
| 447 | lua_assert(J->exitno+1 < T->nsnap); | ||
| 448 | pass23 = 0; | ||
| 449 | /* Emit dependent PVALs. */ | ||
| 450 | for (n = 0; n < nent; n++) { | ||
| 451 | SnapEntry sn = map[n]; | ||
| 452 | IRRef refp = snap_ref(sn); | ||
| 453 | IRIns *ir = &T->ir[refp]; | ||
| 454 | if (regsp_reg(ir->r) == RID_SUNK) { | ||
| 455 | if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; | ||
| 456 | pass23 = 1; | ||
| 457 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || | ||
| 458 | ir->o == IR_CNEW || ir->o == IR_CNEWI); | ||
| 459 | if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); | ||
| 460 | if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); | ||
| 461 | if (LJ_HASFFI && ir->o == IR_CNEWI) { | ||
| 462 | if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) | ||
| 463 | snap_pref(J, T, map, nent, seen, (ir+1)->op2); | ||
| 464 | } else { | ||
| 465 | IRIns *irs; | ||
| 466 | for (irs = ir+1; irs < irlast; irs++) | ||
| 467 | if (irs->r == RID_SINK && ir + irs->s == irs) { | ||
| 468 | if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) | ||
| 469 | snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); | ||
| 470 | else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && | ||
| 471 | irs+1 < irlast && (irs+1)->o == IR_HIOP) | ||
| 472 | snap_pref(J, T, map, nent, seen, (irs+1)->op2); | ||
| 473 | } | ||
| 474 | } | ||
| 475 | } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { | ||
| 476 | lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); | ||
| 477 | J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); | ||
| 478 | } | ||
| 479 | } | ||
| 480 | /* Replay sunk instructions. */ | ||
| 481 | for (n = 0; pass23 && n < nent; n++) { | ||
| 482 | SnapEntry sn = map[n]; | ||
| 483 | IRRef refp = snap_ref(sn); | ||
| 484 | IRIns *ir = &T->ir[refp]; | ||
| 485 | if (regsp_reg(ir->r) == RID_SUNK) { | ||
| 486 | TRef op1, op2; | ||
| 487 | if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */ | ||
| 488 | J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]]; | ||
| 489 | continue; | ||
| 490 | } | ||
| 491 | op1 = ir->op1; | ||
| 492 | if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1); | ||
| 493 | op2 = ir->op2; | ||
| 494 | if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2); | ||
| 495 | if (LJ_HASFFI && ir->o == IR_CNEWI) { | ||
| 496 | if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) { | ||
| 497 | lj_needsplit(J); /* Emit joining HIOP. */ | ||
| 498 | op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2, | ||
| 499 | snap_pref(J, T, map, nent, seen, (ir+1)->op2)); | ||
| 500 | } | ||
| 501 | J->slot[snap_slot(sn)] = emitir(ir->ot, op1, op2); | ||
| 502 | } else { | ||
| 503 | IRIns *irs; | ||
| 504 | TRef tr = emitir(ir->ot, op1, op2); | ||
| 505 | J->slot[snap_slot(sn)] = tr; | ||
| 506 | for (irs = ir+1; irs < irlast; irs++) | ||
| 507 | if (irs->r == RID_SINK && ir + irs->s == irs) { | ||
| 508 | IRIns *irr = &T->ir[irs->op1]; | ||
| 509 | TRef val, key = irr->op2, tmp = tr; | ||
| 510 | if (irr->o != IR_FREF) { | ||
| 511 | IRIns *irk = &T->ir[key]; | ||
| 512 | if (irr->o == IR_HREFK) | ||
| 513 | key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]), | ||
| 514 | irk->op2); | ||
| 515 | else | ||
| 516 | key = snap_replay_const(J, irk); | ||
| 517 | if (irr->o == IR_HREFK || irr->o == IR_AREF) { | ||
| 518 | IRIns *irf = &T->ir[irr->op1]; | ||
| 519 | tmp = emitir(irf->ot, tmp, irf->op2); | ||
| 520 | } | ||
| 521 | } | ||
| 522 | tmp = emitir(irr->ot, tmp, key); | ||
| 523 | val = snap_pref(J, T, map, nent, seen, irs->op2); | ||
| 524 | if (val == 0) { | ||
| 525 | IRIns *irc = &T->ir[irs->op2]; | ||
| 526 | lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); | ||
| 527 | val = snap_pref(J, T, map, nent, seen, irc->op1); | ||
| 528 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); | ||
| 529 | } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && | ||
| 530 | irs+1 < irlast && (irs+1)->o == IR_HIOP) { | ||
| 531 | IRType t = IRT_I64; | ||
| 532 | if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) | ||
| 533 | t = IRT_NUM; | ||
| 534 | if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { | ||
| 535 | uint64_t k = (uint32_t)T->ir[irs->op2].i + | ||
| 536 | ((uint64_t)T->ir[(irs+1)->op2].i << 32); | ||
| 537 | val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, | ||
| 538 | lj_ir_k64_find(J, k)); | ||
| 539 | } else { | ||
| 540 | val = emitir_raw(IRT(IR_HIOP, t), val, | ||
| 541 | snap_pref(J, T, map, nent, seen, (irs+1)->op2)); | ||
| 542 | } | ||
| 543 | tmp = emitir(IRT(irs->o, t), tmp, val); | ||
| 544 | continue; | ||
| 545 | } | ||
| 546 | tmp = emitir(irs->ot, tmp, val); | ||
| 547 | } | ||
| 548 | } | ||
| 549 | } | ||
| 550 | } | ||
| 551 | } | ||
| 414 | J->base = J->slot + J->baseslot; | 552 | J->base = J->slot + J->baseslot; |
| 415 | J->maxslot = snap->nslots - J->baseslot; | 553 | J->maxslot = snap->nslots - J->baseslot; |
| 416 | lj_snap_add(J); | 554 | lj_snap_add(J); |
| 555 | if (pass23) /* Need explicit GC step _after_ initial snapshot. */ | ||
| 556 | emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0); | ||
| 417 | } | 557 | } |
| 418 | 558 | ||
| 419 | /* -- Snapshot restore ---------------------------------------------------- */ | 559 | /* -- Snapshot restore ---------------------------------------------------- */ |
| 420 | 560 | ||
| 561 | static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | ||
| 562 | SnapNo snapno, BloomFilter rfilt, | ||
| 563 | IRIns *ir, TValue *o); | ||
| 564 | |||
| 421 | /* Restore a value from the trace exit state. */ | 565 | /* Restore a value from the trace exit state. */ |
| 422 | static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | 566 | static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, |
| 423 | SnapNo snapno, BloomFilter rfilt, | 567 | SnapNo snapno, BloomFilter rfilt, |
| @@ -450,8 +594,12 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | |||
| 450 | } | 594 | } |
| 451 | } else { /* Restore from register. */ | 595 | } else { /* Restore from register. */ |
| 452 | Reg r = regsp_reg(rs); | 596 | Reg r = regsp_reg(rs); |
| 453 | lua_assert(ra_hasreg(r)); | 597 | if (ra_noreg(r)) { |
| 454 | if (irt_isinteger(t)) { | 598 | lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); |
| 599 | snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); | ||
| 600 | if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); | ||
| 601 | return; | ||
| 602 | } else if (irt_isinteger(t)) { | ||
| 455 | setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); | 603 | setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); |
| 456 | #if !LJ_SOFTFP | 604 | #if !LJ_SOFTFP |
| 457 | } else if (irt_isnum(t)) { | 605 | } else if (irt_isnum(t)) { |
| @@ -468,6 +616,148 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | |||
| 468 | } | 616 | } |
| 469 | } | 617 | } |
| 470 | 618 | ||
| 619 | #if LJ_HASFFI | ||
| 620 | /* Restore raw data from the trace exit state. */ | ||
| 621 | static void snap_restoredata(GCtrace *T, ExitState *ex, | ||
| 622 | SnapNo snapno, BloomFilter rfilt, | ||
| 623 | IRRef ref, void *dst, CTSize sz) | ||
| 624 | { | ||
| 625 | IRIns *ir = &T->ir[ref]; | ||
| 626 | RegSP rs = ir->prev; | ||
| 627 | int32_t *src; | ||
| 628 | union { uint64_t u64; float f; } tmp; | ||
| 629 | if (irref_isk(ref)) { | ||
| 630 | if (ir->o == IR_KNUM || ir->o == IR_KINT64) { | ||
| 631 | src = mref(ir->ptr, int32_t); | ||
| 632 | } else if (sz == 8) { | ||
| 633 | tmp.u64 = (uint64_t)(uint32_t)ir->i; | ||
| 634 | src = (int32_t *)&tmp.u64; | ||
| 635 | } else { | ||
| 636 | src = &ir->i; | ||
| 637 | } | ||
| 638 | } else { | ||
| 639 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) | ||
| 640 | rs = snap_renameref(T, snapno, ref, rs); | ||
| 641 | if (ra_hasspill(regsp_spill(rs))) { | ||
| 642 | src = &ex->spill[regsp_spill(rs)]; | ||
| 643 | } else { | ||
| 644 | Reg r = regsp_reg(rs); | ||
| 645 | if (ra_noreg(r)) { | ||
| 646 | /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ | ||
| 647 | lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); | ||
| 648 | snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4); | ||
| 649 | *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; | ||
| 650 | return; | ||
| 651 | } | ||
| 652 | src = (int32_t *)&ex->gpr[r-RID_MIN_GPR]; | ||
| 653 | #if !LJ_SOFTFP | ||
| 654 | if (r >= RID_MAX_GPR) { | ||
| 655 | src = (int32_t *)&ex->fpr[r-RID_MIN_FPR]; | ||
| 656 | #if LJ_TARGET_PPC | ||
| 657 | if (sz == 4) { /* PPC FPRs are always doubles. */ | ||
| 658 | tmp.f = (float)*(double *)src; | ||
| 659 | src = (int32_t *)&tmp.f; | ||
| 660 | } | ||
| 661 | #else | ||
| 662 | if (LJ_BE && sz == 4) src++; | ||
| 663 | #endif | ||
| 664 | } | ||
| 665 | #endif | ||
| 666 | } | ||
| 667 | } | ||
| 668 | lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); | ||
| 669 | if (sz == 4) *(int32_t *)dst = *src; | ||
| 670 | else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; | ||
| 671 | else if (sz == 1) *(int8_t *)dst = (int8_t)*src; | ||
| 672 | else *(int16_t *)dst = (int16_t)*src; | ||
| 673 | } | ||
| 674 | #endif | ||
| 675 | |||
| 676 | /* Unsink allocation from the trace exit state. Unsink sunk stores. */ | ||
| 677 | static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | ||
| 678 | SnapNo snapno, BloomFilter rfilt, | ||
| 679 | IRIns *ir, TValue *o) | ||
| 680 | { | ||
| 681 | lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || | ||
| 682 | ir->o == IR_CNEW || ir->o == IR_CNEWI); | ||
| 683 | #if LJ_HASFFI | ||
| 684 | if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { | ||
| 685 | CTState *cts = ctype_ctsG(J2G(J)); | ||
| 686 | CTypeID id = (CTypeID)T->ir[ir->op1].i; | ||
| 687 | CTSize sz = lj_ctype_size(cts, id); | ||
| 688 | GCcdata *cd = lj_cdata_new(cts, id, sz); | ||
| 689 | setcdataV(J->L, o, cd); | ||
| 690 | if (ir->o == IR_CNEWI) { | ||
| 691 | uint8_t *p = (uint8_t *)cdataptr(cd); | ||
| 692 | lua_assert(sz == 4 || sz == 8); | ||
| 693 | if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { | ||
| 694 | snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); | ||
| 695 | if (LJ_BE) p += 4; | ||
| 696 | sz = 4; | ||
| 697 | } | ||
| 698 | snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); | ||
| 699 | } else { | ||
| 700 | IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; | ||
| 701 | for (irs = ir+1; irs < irlast; irs++) | ||
| 702 | if (irs->r == RID_SINK && ir + irs->s == irs) { | ||
| 703 | IRIns *iro = &T->ir[T->ir[irs->op1].op2]; | ||
| 704 | uint8_t *p = (uint8_t *)cd; | ||
| 705 | CTSize szs; | ||
| 706 | lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD); | ||
| 707 | lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64); | ||
| 708 | if (irt_is64(irs->t)) szs = 8; | ||
| 709 | else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; | ||
| 710 | else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; | ||
| 711 | else szs = 4; | ||
| 712 | if (LJ_64 && iro->o == IR_KINT64) | ||
| 713 | p += (int64_t)ir_k64(iro)->u64; | ||
| 714 | else | ||
| 715 | p += iro->i; | ||
| 716 | lua_assert(p >= (uint8_t *)cdataptr(cd) && | ||
| 717 | p + szs <= (uint8_t *)cdataptr(cd) + sz); | ||
| 718 | if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { | ||
| 719 | lua_assert(szs == 4); | ||
| 720 | snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); | ||
| 721 | if (LJ_BE) p += 4; | ||
| 722 | } | ||
| 723 | snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); | ||
| 724 | } | ||
| 725 | } | ||
| 726 | } else | ||
| 727 | #endif | ||
| 728 | { | ||
| 729 | IRIns *irs, *irlast; | ||
| 730 | GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) : | ||
| 731 | lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1])); | ||
| 732 | settabV(J->L, o, t); | ||
| 733 | irlast = &T->ir[T->snap[snapno].ref]; | ||
| 734 | for (irs = ir+1; irs < irlast; irs++) | ||
| 735 | if (irs->r == RID_SINK && ir + irs->s == irs) { | ||
| 736 | IRIns *irk = &T->ir[irs->op1]; | ||
| 737 | TValue tmp, *val; | ||
| 738 | lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || | ||
| 739 | irs->o == IR_FSTORE); | ||
| 740 | if (irk->o == IR_FREF) { | ||
| 741 | lua_assert(irk->op2 == IRFL_TAB_META); | ||
| 742 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); | ||
| 743 | /* NOBARRIER: The table is new (marked white). */ | ||
| 744 | setgcref(t->metatable, obj2gco(tabV(&tmp))); | ||
| 745 | } else { | ||
| 746 | irk = &T->ir[irk->op2]; | ||
| 747 | if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1]; | ||
| 748 | lj_ir_kvalue(J->L, &tmp, irk); | ||
| 749 | val = lj_tab_set(J->L, t, &tmp); | ||
| 750 | /* NOBARRIER: The table is new (marked white). */ | ||
| 751 | snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); | ||
| 752 | if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { | ||
| 753 | snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); | ||
| 754 | val->u32.hi = tmp.u32.lo; | ||
| 755 | } | ||
| 756 | } | ||
| 757 | } | ||
| 758 | } | ||
| 759 | } | ||
| 760 | |||
| 471 | /* Restore interpreter state from exit state with the help of a snapshot. */ | 761 | /* Restore interpreter state from exit state with the help of a snapshot. */ |
| 472 | const BCIns *lj_snap_restore(jit_State *J, void *exptr) | 762 | const BCIns *lj_snap_restore(jit_State *J, void *exptr) |
| 473 | { | 763 | { |
| @@ -500,10 +790,23 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
| 500 | SnapEntry sn = map[n]; | 790 | SnapEntry sn = map[n]; |
| 501 | if (!(sn & SNAP_NORESTORE)) { | 791 | if (!(sn & SNAP_NORESTORE)) { |
| 502 | TValue *o = &frame[snap_slot(sn)]; | 792 | TValue *o = &frame[snap_slot(sn)]; |
| 503 | snap_restoreval(J, T, ex, snapno, rfilt, snap_ref(sn), o); | 793 | IRRef ref = snap_ref(sn); |
| 794 | IRIns *ir = &T->ir[ref]; | ||
| 795 | if (ir->r == RID_SUNK) { | ||
| 796 | MSize j; | ||
| 797 | for (j = 0; j < n; j++) | ||
| 798 | if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */ | ||
| 799 | copyTV(L, o, &frame[snap_slot(map[j])]); | ||
| 800 | goto dupslot; | ||
| 801 | } | ||
| 802 | snap_unsink(J, T, ex, snapno, rfilt, ir, o); | ||
| 803 | dupslot: | ||
| 804 | continue; | ||
| 805 | } | ||
| 806 | snap_restoreval(J, T, ex, snapno, rfilt, ref, o); | ||
| 504 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { | 807 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { |
| 505 | TValue tmp; | 808 | TValue tmp; |
| 506 | snap_restoreval(J, T, ex, snapno, rfilt, snap_ref(sn)+1, &tmp); | 809 | snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); |
| 507 | o->u32.hi = tmp.u32.lo; | 810 | o->u32.hi = tmp.u32.lo; |
| 508 | } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { | 811 | } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { |
| 509 | /* Overwrite tag with frame link. */ | 812 | /* Overwrite tag with frame link. */ |
| @@ -528,5 +831,6 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |||
| 528 | 831 | ||
| 529 | #undef IR | 832 | #undef IR |
| 530 | #undef emitir_raw | 833 | #undef emitir_raw |
| 834 | #undef emitir | ||
| 531 | 835 | ||
| 532 | #endif | 836 | #endif |
diff --git a/src/lj_target.h b/src/lj_target.h index 13de8fc6..4808a38c 100644 --- a/src/lj_target.h +++ b/src/lj_target.h | |||
| @@ -16,17 +16,19 @@ typedef uint32_t Reg; | |||
| 16 | 16 | ||
| 17 | /* The hi-bit is NOT set for an allocated register. This means the value | 17 | /* The hi-bit is NOT set for an allocated register. This means the value |
| 18 | ** can be directly used without masking. The hi-bit is set for a register | 18 | ** can be directly used without masking. The hi-bit is set for a register |
| 19 | ** allocation hint or for RID_INIT. | 19 | ** allocation hint or for RID_INIT, RID_SINK or RID_SUNK. |
| 20 | */ | 20 | */ |
| 21 | #define RID_NONE 0x80 | 21 | #define RID_NONE 0x80 |
| 22 | #define RID_MASK 0x7f | 22 | #define RID_MASK 0x7f |
| 23 | #define RID_INIT (RID_NONE|RID_MASK) | 23 | #define RID_INIT (RID_NONE|RID_MASK) |
| 24 | #define RID_SINK (RID_INIT-1) | ||
| 25 | #define RID_SUNK (RID_INIT-2) | ||
| 24 | 26 | ||
| 25 | #define ra_noreg(r) ((r) & RID_NONE) | 27 | #define ra_noreg(r) ((r) & RID_NONE) |
| 26 | #define ra_hasreg(r) (!((r) & RID_NONE)) | 28 | #define ra_hasreg(r) (!((r) & RID_NONE)) |
| 27 | 29 | ||
| 28 | /* The ra_hashint() macro assumes a previous test for ra_noreg(). */ | 30 | /* The ra_hashint() macro assumes a previous test for ra_noreg(). */ |
| 29 | #define ra_hashint(r) ((r) != RID_INIT) | 31 | #define ra_hashint(r) ((r) < RID_SUNK) |
| 30 | #define ra_gethint(r) ((Reg)((r) & RID_MASK)) | 32 | #define ra_gethint(r) ((Reg)((r) & RID_MASK)) |
| 31 | #define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE) | 33 | #define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE) |
| 32 | #define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0) | 34 | #define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0) |
diff --git a/src/lj_trace.c b/src/lj_trace.c index ad00dc67..240e7fc8 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c | |||
| @@ -606,6 +606,7 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) | |||
| 606 | J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */ | 606 | J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */ |
| 607 | } | 607 | } |
| 608 | lj_opt_split(J); | 608 | lj_opt_split(J); |
| 609 | lj_opt_sink(J); | ||
| 609 | J->state = LJ_TRACE_ASM; | 610 | J->state = LJ_TRACE_ASM; |
| 610 | break; | 611 | break; |
| 611 | 612 | ||
diff --git a/src/ljamalg.c b/src/ljamalg.c index 1b58ceb4..b1124464 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c | |||
| @@ -64,6 +64,7 @@ | |||
| 64 | #include "lj_opt_dce.c" | 64 | #include "lj_opt_dce.c" |
| 65 | #include "lj_opt_loop.c" | 65 | #include "lj_opt_loop.c" |
| 66 | #include "lj_opt_split.c" | 66 | #include "lj_opt_split.c" |
| 67 | #include "lj_opt_sink.c" | ||
| 67 | #include "lj_mcode.c" | 68 | #include "lj_mcode.c" |
| 68 | #include "lj_snap.c" | 69 | #include "lj_snap.c" |
| 69 | #include "lj_record.c" | 70 | #include "lj_record.c" |
