diff options
author | Mike Pall <mike> | 2010-01-26 21:49:04 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2010-01-26 21:49:04 +0100 |
commit | 67ca399a30cec05acacd7ea33d5cb0e361f92755 (patch) | |
tree | 335806ea53e2f98a71eb2395baa1d3d7bea352ae /src | |
parent | e058714a2e3745a819b77e6b50551e423897026a (diff) | |
download | luajit-67ca399a30cec05acacd7ea33d5cb0e361f92755.tar.gz luajit-67ca399a30cec05acacd7ea33d5cb0e361f92755.tar.bz2 luajit-67ca399a30cec05acacd7ea33d5cb0e361f92755.zip |
Compress snapshots using a simple, extensible 1D-compression.
Typically reduces storage overhead for snapshot maps by 60%.
The extensible format is a prerequisite for the next redesign steps:
Eliminate IR_FRAME and implement return-to-lower-frame.
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.dep | 6 | ||||
-rw-r--r-- | src/lib_jit.c | 21 | ||||
-rw-r--r-- | src/lj_asm.c | 101 | ||||
-rw-r--r-- | src/lj_gdbjit.c | 4 | ||||
-rw-r--r-- | src/lj_jit.h | 16 | ||||
-rw-r--r-- | src/lj_opt_dce.c | 6 | ||||
-rw-r--r-- | src/lj_opt_loop.c | 168 | ||||
-rw-r--r-- | src/lj_record.c | 97 | ||||
-rw-r--r-- | src/lj_snap.c | 247 | ||||
-rw-r--r-- | src/lj_snap.h | 13 | ||||
-rw-r--r-- | src/lj_trace.c | 5 |
11 files changed, 365 insertions, 319 deletions
diff --git a/src/Makefile.dep b/src/Makefile.dep index 779ee545..ffb7d79b 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep | |||
@@ -11,7 +11,7 @@ buildvm_lib.o: buildvm_lib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
11 | buildvm_peobj.o: buildvm_peobj.c buildvm.h lj_def.h lua.h luaconf.h \ | 11 | buildvm_peobj.o: buildvm_peobj.c buildvm.h lj_def.h lua.h luaconf.h \ |
12 | lj_arch.h lj_bc.h | 12 | lj_arch.h lj_bc.h |
13 | lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ | 13 | lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ |
14 | lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_alloc.h | 14 | lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_lib.h lj_alloc.h |
15 | lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 15 | lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
16 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ | 16 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ |
17 | lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h | 17 | lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h |
@@ -87,8 +87,8 @@ lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
87 | lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ | 87 | lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ |
88 | lj_traceerr.h lj_vm.h lj_folddef.h | 88 | lj_traceerr.h lj_vm.h lj_folddef.h |
89 | lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 89 | lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
90 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \ | 90 | lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ |
91 | lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h | 91 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h |
92 | lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 92 | lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
93 | lj_tab.h lj_ir.h lj_jit.h lj_iropt.h | 93 | lj_tab.h lj_ir.h lj_jit.h lj_iropt.h |
94 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ | 94 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ |
diff --git a/src/lib_jit.c b/src/lib_jit.c index aa421613..f3425d98 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c | |||
@@ -332,18 +332,25 @@ LJLIB_CF(jit_util_tracesnap) | |||
332 | if (T && sn < T->nsnap) { | 332 | if (T && sn < T->nsnap) { |
333 | SnapShot *snap = &T->snap[sn]; | 333 | SnapShot *snap = &T->snap[sn]; |
334 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 334 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
335 | BCReg s, nslots = snap->nslots; | 335 | MSize n, nent = snap->nent; |
336 | BCReg nslots = snap->nslots; | ||
336 | GCtab *t; | 337 | GCtab *t; |
337 | lua_createtable(L, nslots ? (int)nslots : 1, 0); | 338 | lua_createtable(L, nslots ? (int)nslots : 1, 0); |
338 | t = tabV(L->top-1); | 339 | t = tabV(L->top-1); |
339 | setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS); | 340 | setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS); |
340 | for (s = 0; s < nslots; s++) { | 341 | /* NYI: get rid of this and expose the compressed slot map. */ |
341 | TValue *o = lj_tab_setint(L, t, (int32_t)(s+1)); | 342 | { |
342 | IRRef ref = snap_ref(map[s]); | 343 | BCReg s; |
343 | if (ref) | 344 | for (s = 0; s < nslots; s++) { |
344 | setintV(o, (int32_t)ref - REF_BIAS); | 345 | TValue *o = lj_tab_setint(L, t, (int32_t)(s+1)); |
345 | else | ||
346 | setboolV(o, 0); | 346 | setboolV(o, 0); |
347 | } | ||
348 | } | ||
349 | for (n = 0; n < nent; n++) { | ||
350 | BCReg s = snap_slot(map[n]); | ||
351 | IRRef ref = snap_ref(map[n]); | ||
352 | TValue *o = lj_tab_setint(L, t, (int32_t)(s+1)); | ||
353 | setintV(o, (int32_t)ref - REF_BIAS); | ||
347 | } | 354 | } |
348 | return 1; | 355 | return 1; |
349 | } | 356 | } |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 9f5ce030..b3656e00 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
@@ -926,9 +926,9 @@ static void asm_snap_alloc(ASMState *as) | |||
926 | { | 926 | { |
927 | SnapShot *snap = &as->T->snap[as->snapno]; | 927 | SnapShot *snap = &as->T->snap[as->snapno]; |
928 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 928 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
929 | BCReg s, nslots = snap->nslots; | 929 | MSize n, nent = snap->nent; |
930 | for (s = 0; s < nslots; s++) { | 930 | for (n = 0; n < nent; n++) { |
931 | IRRef ref = snap_ref(map[s]); | 931 | IRRef ref = snap_ref(map[n]); |
932 | if (!irref_isk(ref)) { | 932 | if (!irref_isk(ref)) { |
933 | IRIns *ir = IR(ref); | 933 | IRIns *ir = IR(ref); |
934 | if (!ra_used(ir) && ir->o != IR_FRAME) { | 934 | if (!ra_used(ir) && ir->o != IR_FRAME) { |
@@ -960,9 +960,9 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren) | |||
960 | { | 960 | { |
961 | SnapShot *snap = &as->T->snap[as->snapno]; | 961 | SnapShot *snap = &as->T->snap[as->snapno]; |
962 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 962 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
963 | BCReg s, nslots = snap->nslots; | 963 | MSize n, nent = snap->nent; |
964 | for (s = 0; s < nslots; s++) { | 964 | for (n = 0; n < nent; n++) { |
965 | IRRef ref = snap_ref(map[s]); | 965 | IRRef ref = snap_ref(map[n]); |
966 | if (ref == ren) { | 966 | if (ref == ren) { |
967 | IRIns *ir = IR(ref); | 967 | IRIns *ir = IR(ref); |
968 | ra_spill(as, ir); /* Register renamed, so force a spill slot. */ | 968 | ra_spill(as, ir); /* Register renamed, so force a spill slot. */ |
@@ -2465,18 +2465,17 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base) | |||
2465 | */ | 2465 | */ |
2466 | RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base); | 2466 | RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base); |
2467 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 2467 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
2468 | BCReg s, nslots = snap->nslots; | 2468 | MSize n, nent = snap->nent; |
2469 | for (s = 0; s < nslots; s++) { | 2469 | for (n = 0; n < nent; n++) { |
2470 | IRRef ref = snap_ref(map[s]); | 2470 | IRRef ref = snap_ref(map[n]); |
2471 | if (!irref_isk(ref)) { | 2471 | if (!irref_isk(ref)) { |
2472 | int32_t ofs = 8*(int32_t)(snap_slot(map[n])-1); | ||
2472 | IRIns *ir = IR(ref); | 2473 | IRIns *ir = IR(ref); |
2473 | if (ir->o == IR_FRAME) { | 2474 | if (ir->o == IR_FRAME) { |
2474 | /* NYI: sync the frame, bump base, set topslot, clear new slots. */ | 2475 | /* NYI: sync the frame, bump base, set topslot, clear new slots. */ |
2475 | lj_trace_err(as->J, LJ_TRERR_NYIGCF); | 2476 | lj_trace_err(as->J, LJ_TRERR_NYIGCF); |
2476 | } else if (irt_isgcv(ir->t) && | 2477 | } else if (irt_isgcv(ir->t)) { |
2477 | !(ir->o == IR_SLOAD && ir->op1 < nslots && map[ir->op1] == 0)) { | ||
2478 | Reg src = ra_alloc1(as, ref, allow); | 2478 | Reg src = ra_alloc1(as, ref, allow); |
2479 | int32_t ofs = 8*(int32_t)(s-1); | ||
2480 | emit_movtomro(as, src, base, ofs); | 2479 | emit_movtomro(as, src, base, ofs); |
2481 | emit_movmroi(as, base, ofs+4, irt_toitype(ir->t)); | 2480 | emit_movmroi(as, base, ofs+4, irt_toitype(ir->t)); |
2482 | checkmclim(as); | 2481 | checkmclim(as); |
@@ -2504,7 +2503,7 @@ static void asm_gc_check(ASMState *as, SnapShot *snap) | |||
2504 | emit_loadi(as, tmp, (int32_t)as->gcsteps); | 2503 | emit_loadi(as, tmp, (int32_t)as->gcsteps); |
2505 | /* We don't know spadj yet, so get the C frame from L->cframe. */ | 2504 | /* We don't know spadj yet, so get the C frame from L->cframe. */ |
2506 | emit_movmroi(as, tmp, CFRAME_OFS_PC, | 2505 | emit_movmroi(as, tmp, CFRAME_OFS_PC, |
2507 | (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]); | 2506 | (int32_t)as->T->snapmap[snap->mapofs+snap->nent]); |
2508 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); | 2507 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); |
2509 | lstate = IR(ASMREF_L)->r; | 2508 | lstate = IR(ASMREF_L)->r; |
2510 | emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); | 2509 | emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); |
@@ -2965,19 +2964,19 @@ static void asm_head_side(ASMState *as) | |||
2965 | static void asm_tail_sync(ASMState *as) | 2964 | static void asm_tail_sync(ASMState *as) |
2966 | { | 2965 | { |
2967 | SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ | 2966 | SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ |
2968 | BCReg s, nslots = snap->nslots; | 2967 | MSize n, nent = snap->nent; |
2969 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 2968 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
2970 | SnapEntry *flinks = map + nslots + snap->nframelinks; | 2969 | SnapEntry *flinks = map + nent + snap->nframelinks; |
2971 | BCReg newbase = 0; | 2970 | BCReg newbase = 0; |
2972 | BCReg secondbase = ~(BCReg)0; | 2971 | BCReg nslots, topslot = 0; |
2973 | BCReg topslot = 0; | ||
2974 | 2972 | ||
2975 | checkmclim(as); | 2973 | checkmclim(as); |
2976 | ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); | 2974 | ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); |
2977 | 2975 | ||
2978 | /* Must check all frames to find topslot (outer can be larger than inner). */ | 2976 | /* Must check all frames to find topslot (outer can be larger than inner). */ |
2979 | for (s = 0; s < nslots; s++) { | 2977 | for (n = 0; n < nent; n++) { |
2980 | IRRef ref = snap_ref(map[s]); | 2978 | IRRef ref = snap_ref(map[n]); |
2979 | BCReg s = snap_slot(map[n]); | ||
2981 | if (!irref_isk(ref)) { | 2980 | if (!irref_isk(ref)) { |
2982 | IRIns *ir = IR(ref); | 2981 | IRIns *ir = IR(ref); |
2983 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { | 2982 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { |
@@ -2985,10 +2984,7 @@ static void asm_tail_sync(ASMState *as) | |||
2985 | if (isluafunc(fn)) { | 2984 | if (isluafunc(fn)) { |
2986 | BCReg fs = s + funcproto(fn)->framesize; | 2985 | BCReg fs = s + funcproto(fn)->framesize; |
2987 | if (fs > topslot) topslot = fs; | 2986 | if (fs > topslot) topslot = fs; |
2988 | if (s != 0) { | 2987 | newbase = s; |
2989 | newbase = s; | ||
2990 | if (secondbase == ~(BCReg)0) secondbase = s; | ||
2991 | } | ||
2992 | } | 2988 | } |
2993 | } | 2989 | } |
2994 | } | 2990 | } |
@@ -2998,7 +2994,7 @@ static void asm_tail_sync(ASMState *as) | |||
2998 | if (as->T->link == TRACE_INTERP) { | 2994 | if (as->T->link == TRACE_INTERP) { |
2999 | /* Setup fixed registers for exit to interpreter. */ | 2995 | /* Setup fixed registers for exit to interpreter. */ |
3000 | emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); | 2996 | emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); |
3001 | emit_loadi(as, RID_PC, (int32_t)map[nslots]); | 2997 | emit_loadi(as, RID_PC, (int32_t)map[nent]); |
3002 | } else if (newbase) { | 2998 | } else if (newbase) { |
3003 | /* Save modified BASE for linking to trace with higher start frame. */ | 2999 | /* Save modified BASE for linking to trace with higher start frame. */ |
3004 | emit_setgl(as, RID_BASE, jit_base); | 3000 | emit_setgl(as, RID_BASE, jit_base); |
@@ -3007,51 +3003,50 @@ static void asm_tail_sync(ASMState *as) | |||
3007 | emit_addptr(as, RID_BASE, 8*(int32_t)newbase); | 3003 | emit_addptr(as, RID_BASE, 8*(int32_t)newbase); |
3008 | 3004 | ||
3009 | /* Clear stack slots of newly added frames. */ | 3005 | /* Clear stack slots of newly added frames. */ |
3006 | nslots = snap->nslots; | ||
3010 | if (nslots <= topslot) { | 3007 | if (nslots <= topslot) { |
3011 | if (nslots < topslot) { | 3008 | if (nslots < topslot) { |
3009 | BCReg s; | ||
3012 | for (s = nslots; s <= topslot; s++) { | 3010 | for (s = nslots; s <= topslot; s++) { |
3013 | emit_movtomro(as, RID_EAX, RID_BASE, 8*(int32_t)s-4); | 3011 | emit_movtomro(as, RID_EAX, RID_BASE, 8*((int32_t)s-1)+4); |
3014 | checkmclim(as); | 3012 | checkmclim(as); |
3015 | } | 3013 | } |
3016 | emit_loadi(as, RID_EAX, LJ_TNIL); | 3014 | emit_loadi(as, RID_EAX, LJ_TNIL); |
3017 | } else { | 3015 | } else { |
3018 | emit_movmroi(as, RID_BASE, 8*(int32_t)nslots-4, LJ_TNIL); | 3016 | emit_movmroi(as, RID_BASE, 8*((int32_t)nslots-1)+4, LJ_TNIL); |
3019 | } | 3017 | } |
3020 | } | 3018 | } |
3021 | 3019 | ||
3022 | /* Store the value of all modified slots to the Lua stack. */ | 3020 | /* Store the value of all modified slots to the Lua stack. */ |
3023 | for (s = 0; s < nslots; s++) { | 3021 | for (n = 0; n < nent; n++) { |
3022 | BCReg s = snap_slot(map[n]); | ||
3024 | int32_t ofs = 8*((int32_t)s-1); | 3023 | int32_t ofs = 8*((int32_t)s-1); |
3025 | IRRef ref = snap_ref(map[s]); | 3024 | IRRef ref = snap_ref(map[n]); |
3026 | if (ref) { | 3025 | IRIns *ir = IR(ref); |
3027 | IRIns *ir = IR(ref); | 3026 | /* No need to restore readonly slots and unmodified non-parent slots. */ |
3028 | /* No need to restore readonly slots and unmodified non-parent slots. */ | 3027 | if (ir->o == IR_SLOAD && ir->op1 == s && |
3029 | if (ir->o == IR_SLOAD && ir->op1 == s && | 3028 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) |
3030 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | 3029 | continue; |
3031 | continue; | 3030 | if (irt_isnum(ir->t)) { |
3032 | if (irt_isnum(ir->t)) { | 3031 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
3033 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 3032 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); |
3034 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); | 3033 | } else if (ir->o == IR_FRAME) { |
3035 | } else if (ir->o == IR_FRAME) { | 3034 | emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2)))); |
3036 | emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2)))); | 3035 | if (s != 0) /* Do not overwrite link to previous frame. */ |
3037 | if (s != 0) /* Do not overwrite link to previous frame. */ | 3036 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks)); |
3038 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks)); | ||
3039 | } else { | ||
3040 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); | ||
3041 | if (!irref_isk(ref)) { | ||
3042 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | ||
3043 | emit_movtomro(as, src, RID_BASE, ofs); | ||
3044 | } else if (!irt_ispri(ir->t)) { | ||
3045 | emit_movmroi(as, RID_BASE, ofs, ir->i); | ||
3046 | } | ||
3047 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | ||
3048 | } | ||
3049 | } else { | 3037 | } else { |
3050 | lua_assert(!(s > secondbase)); | 3038 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); |
3039 | if (!irref_isk(ref)) { | ||
3040 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | ||
3041 | emit_movtomro(as, src, RID_BASE, ofs); | ||
3042 | } else if (!irt_ispri(ir->t)) { | ||
3043 | emit_movmroi(as, RID_BASE, ofs, ir->i); | ||
3044 | } | ||
3045 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | ||
3051 | } | 3046 | } |
3052 | checkmclim(as); | 3047 | checkmclim(as); |
3053 | } | 3048 | } |
3054 | lua_assert(map + nslots == flinks-1); | 3049 | lua_assert(map + nent == flinks-1); |
3055 | } | 3050 | } |
3056 | 3051 | ||
3057 | /* Fixup the tail code. */ | 3052 | /* Fixup the tail code. */ |
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index 4fce5eb9..345afb51 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c | |||
@@ -698,8 +698,8 @@ void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno) | |||
698 | lua_State *L = J->L; | 698 | lua_State *L = J->L; |
699 | GCproto *pt = &gcref(T->startpt)->pt; | 699 | GCproto *pt = &gcref(T->startpt)->pt; |
700 | TraceNo parent = T->ir[REF_BASE].op1; | 700 | TraceNo parent = T->ir[REF_BASE].op1; |
701 | uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nslots); | 701 | uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nent); |
702 | const BCIns *startpc = (const BCIns *)(uintptr_t)T->snapmap[pcofs]; | 702 | const BCIns *startpc = snap_pc(T->snapmap[pcofs]); |
703 | ctx.T = T; | 703 | ctx.T = T; |
704 | ctx.mcaddr = (uintptr_t)T->mcode; | 704 | ctx.mcaddr = (uintptr_t)T->mcode; |
705 | ctx.szmcode = T->szmcode; | 705 | ctx.szmcode = T->szmcode; |
diff --git a/src/lj_jit.h b/src/lj_jit.h index 36e60113..1a1e407a 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
@@ -112,17 +112,27 @@ typedef uint8_t MCode; | |||
112 | typedef struct SnapShot { | 112 | typedef struct SnapShot { |
113 | uint16_t mapofs; /* Offset into snapshot map. */ | 113 | uint16_t mapofs; /* Offset into snapshot map. */ |
114 | IRRef1 ref; /* First IR ref for this snapshot. */ | 114 | IRRef1 ref; /* First IR ref for this snapshot. */ |
115 | uint8_t nslots; /* Number of stack slots. */ | 115 | uint8_t nslots; /* Number of valid slots. */ |
116 | uint8_t nent; /* Number of compressed entries. */ | ||
116 | uint8_t nframelinks; /* Number of frame links. */ | 117 | uint8_t nframelinks; /* Number of frame links. */ |
117 | uint8_t count; /* Count of taken exits for this snapshot. */ | 118 | uint8_t count; /* Count of taken exits for this snapshot. */ |
118 | uint8_t unused1; | ||
119 | } SnapShot; | 119 | } SnapShot; |
120 | 120 | ||
121 | #define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */ | 121 | #define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */ |
122 | 122 | ||
123 | /* Snapshot entry. */ | 123 | /* Compressed snapshot entry. */ |
124 | typedef uint32_t SnapEntry; | 124 | typedef uint32_t SnapEntry; |
125 | |||
126 | #define SNAP_FRAME 0x010000 /* Slot has frame link. */ | ||
127 | |||
128 | #define SNAP(slot, flags, ref) ((SnapEntry)((slot) << 24) + (flags) + (ref)) | ||
129 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) | ||
130 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) | ||
125 | #define snap_ref(sn) ((sn) & 0xffff) | 131 | #define snap_ref(sn) ((sn) & 0xffff) |
132 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) | ||
133 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) | ||
134 | #define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) | ||
135 | #define snap_setref(sn, ref) (((sn) & 0xffff0000) | (ref)) | ||
126 | 136 | ||
127 | /* Snapshot and exit numbers. */ | 137 | /* Snapshot and exit numbers. */ |
128 | typedef uint32_t SnapNo; | 138 | typedef uint32_t SnapNo; |
diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c index 636d5183..90e81526 100644 --- a/src/lj_opt_dce.c +++ b/src/lj_opt_dce.c | |||
@@ -24,9 +24,9 @@ static void dce_marksnap(jit_State *J) | |||
24 | for (i = 0; i < nsnap; i++) { | 24 | for (i = 0; i < nsnap; i++) { |
25 | SnapShot *snap = &J->cur.snap[i]; | 25 | SnapShot *snap = &J->cur.snap[i]; |
26 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; | 26 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; |
27 | BCReg s, nslots = snap->nslots; | 27 | MSize n, nent = snap->nent; |
28 | for (s = 0; s < nslots; s++) { | 28 | for (n = 0; n < nent; n++) { |
29 | IRRef ref = snap_ref(map[s]); | 29 | IRRef ref = snap_ref(map[n]); |
30 | if (!irref_isk(ref)) | 30 | if (!irref_isk(ref)) |
31 | irt_setmark(IR(ref)->t); | 31 | irt_setmark(IR(ref)->t); |
32 | } | 32 | } |
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index f2950fe9..e5ad5b43 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c | |||
@@ -10,7 +10,6 @@ | |||
10 | 10 | ||
11 | #if LJ_HASJIT | 11 | #if LJ_HASJIT |
12 | 12 | ||
13 | #include "lj_gc.h" | ||
14 | #include "lj_err.h" | 13 | #include "lj_err.h" |
15 | #include "lj_str.h" | 14 | #include "lj_str.h" |
16 | #include "lj_ir.h" | 15 | #include "lj_ir.h" |
@@ -163,21 +162,69 @@ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi) | |||
163 | 162 | ||
164 | /* -- Loop unrolling using copy-substitution ------------------------------ */ | 163 | /* -- Loop unrolling using copy-substitution ------------------------------ */ |
165 | 164 | ||
165 | /* Copy-substitute snapshot. */ | ||
166 | static void loop_subst_snap(jit_State *J, SnapShot *osnap, | ||
167 | SnapEntry *loopmap, IRRef1 *subst) | ||
168 | { | ||
169 | SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; | ||
170 | MSize nmapofs, nframelinks; | ||
171 | MSize on, ln, nn, onent = osnap->nent; | ||
172 | BCReg nslots = osnap->nslots; | ||
173 | SnapShot *snap = &J->cur.snap[J->cur.nsnap]; | ||
174 | if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ | ||
175 | nmapofs = J->cur.nsnapmap; | ||
176 | J->cur.nsnap++; /* Add new snapshot. */ | ||
177 | } else { /* Otherwise overwrite previous snapshot. */ | ||
178 | snap--; | ||
179 | nmapofs = snap->mapofs; | ||
180 | } | ||
181 | J->guardemit.irt = 0; | ||
182 | nframelinks = osnap->nframelinks; | ||
183 | /* Setup new snapshot. */ | ||
184 | snap->mapofs = (uint16_t)nmapofs; | ||
185 | snap->ref = (IRRef1)J->cur.nins; | ||
186 | snap->nframelinks = (uint8_t)nframelinks; | ||
187 | snap->nslots = nslots; | ||
188 | snap->count = 0; | ||
189 | nmap = &J->cur.snapmap[nmapofs]; | ||
190 | /* Substitute snapshot slots. */ | ||
191 | on = ln = nn = 0; | ||
192 | while (on < onent) { | ||
193 | SnapEntry osn = omap[on], lsn = loopmap[ln]; | ||
194 | if (snap_slot(lsn) < snap_slot(osn)) { /* Copy slot from loop map. */ | ||
195 | nmap[nn++] = lsn; | ||
196 | ln++; | ||
197 | } else { /* Copy substituted slot from snapshot map. */ | ||
198 | if (snap_slot(lsn) == snap_slot(osn)) ln++; /* Shadowed loop slot. */ | ||
199 | if (!irref_isk(snap_ref(osn))) | ||
200 | osn = snap_setref(osn, subst[snap_ref(osn)]); | ||
201 | nmap[nn++] = osn; | ||
202 | on++; | ||
203 | } | ||
204 | } | ||
205 | while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */ | ||
206 | nmap[nn++] = loopmap[ln++]; | ||
207 | snap->nent = (uint8_t)nn; | ||
208 | J->cur.nsnapmap = (uint16_t)(nmapofs + nn + nframelinks); | ||
209 | omap += onent; | ||
210 | nmap += nn; | ||
211 | for (nn = 0; nn < nframelinks; nn++) /* Copy frame links. */ | ||
212 | nmap[nn] = omap[nn]; | ||
213 | } | ||
214 | |||
166 | /* Unroll loop. */ | 215 | /* Unroll loop. */ |
167 | static void loop_unroll(jit_State *J) | 216 | static void loop_unroll(jit_State *J) |
168 | { | 217 | { |
169 | IRRef1 phi[LJ_MAX_PHI]; | 218 | IRRef1 phi[LJ_MAX_PHI]; |
170 | uint32_t nphi = 0; | 219 | uint32_t nphi = 0; |
171 | IRRef1 *subst; | 220 | IRRef1 *subst; |
172 | SnapShot *osnap, *snap; | 221 | SnapShot *osnap; |
173 | SnapEntry *loopmap; | 222 | SnapEntry *loopmap, *psentinel; |
174 | BCReg loopslots; | 223 | IRRef ins, invar; |
175 | MSize nsnap, nsnapmap; | ||
176 | IRRef ins, invar, osnapref; | ||
177 | 224 | ||
178 | /* Use temp buffer for substitution table. | 225 | /* Use temp buffer for substitution table. |
179 | ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. | 226 | ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. |
180 | ** Note: don't call into the VM or run the GC or the buffer may be gone. | 227 | ** Caveat: don't call into the VM or run the GC or the buffer may be gone. |
181 | */ | 228 | */ |
182 | invar = J->cur.nins; | 229 | invar = J->cur.nins; |
183 | subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, | 230 | subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, |
@@ -187,80 +234,37 @@ static void loop_unroll(jit_State *J) | |||
187 | /* LOOP separates the pre-roll from the loop body. */ | 234 | /* LOOP separates the pre-roll from the loop body. */ |
188 | emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); | 235 | emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); |
189 | 236 | ||
190 | /* Ensure size for copy-substituted snapshots (minus #0 and loop snapshot). */ | 237 | /* Grow snapshot buffer and map for copy-substituted snapshots. |
191 | nsnap = J->cur.nsnap; | 238 | ** Need up to twice the number of snapshots minus #0 and loop snapshot. |
192 | if (LJ_UNLIKELY(2*nsnap-2 > J->sizesnap)) { | 239 | ** Need up to twice the number of entries plus fallback substitutions |
193 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; | 240 | ** from the loop snapshot entries for each new snapshot. |
194 | if (2*nsnap-2 > maxsnap) | 241 | ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap! |
195 | lj_trace_err(J, LJ_TRERR_SNAPOV); | 242 | */ |
196 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); | 243 | { |
197 | J->cur.snap = J->snapbuf; | 244 | MSize nsnap = J->cur.nsnap; |
198 | } | 245 | SnapShot *loopsnap; |
199 | nsnapmap = J->cur.nsnapmap; /* Use temp. copy to avoid undo. */ | 246 | lj_snap_grow_buf(J, 2*nsnap-2); |
200 | if (LJ_UNLIKELY(nsnapmap*2 > J->sizesnapmap)) { | 247 | lj_snap_grow_map(J, J->cur.nsnapmap*2+(nsnap-2)*J->cur.snap[nsnap-1].nent); |
201 | J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, | ||
202 | J->sizesnapmap*sizeof(SnapEntry), | ||
203 | 2*J->sizesnapmap*sizeof(SnapEntry)); | ||
204 | J->cur.snapmap = J->snapmapbuf; | ||
205 | J->sizesnapmap *= 2; | ||
206 | } | ||
207 | 248 | ||
208 | /* The loop snapshot is used for fallback substitutions. */ | 249 | /* The loop snapshot is used for fallback substitutions. */ |
209 | snap = &J->cur.snap[nsnap-1]; | 250 | loopsnap = &J->cur.snap[nsnap-1]; |
210 | loopmap = &J->cur.snapmap[snap->mapofs]; | 251 | loopmap = &J->cur.snapmap[loopsnap->mapofs]; |
211 | loopslots = snap->nslots; | 252 | /* The PC of snapshot #0 and the loop snapshot must match. */ |
212 | /* The PC of snapshot #0 and the loop snapshot must match. */ | 253 | psentinel = &loopmap[loopsnap->nent]; |
213 | lua_assert(loopmap[loopslots] == J->cur.snapmap[J->cur.snap[0].nslots]); | 254 | lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); |
255 | *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ | ||
256 | } | ||
214 | 257 | ||
215 | /* Start substitution with snapshot #1 (#0 is empty for root traces). */ | 258 | /* Start substitution with snapshot #1 (#0 is empty for root traces). */ |
216 | osnap = &J->cur.snap[1]; | 259 | osnap = &J->cur.snap[1]; |
217 | osnapref = osnap->ref; | ||
218 | 260 | ||
219 | /* Copy and substitute all recorded instructions and snapshots. */ | 261 | /* Copy and substitute all recorded instructions and snapshots. */ |
220 | for (ins = REF_FIRST; ins < invar; ins++) { | 262 | for (ins = REF_FIRST; ins < invar; ins++) { |
221 | IRIns *ir; | 263 | IRIns *ir; |
222 | IRRef op1, op2; | 264 | IRRef op1, op2; |
223 | 265 | ||
224 | /* Copy-substitute snapshot. */ | 266 | if (ins >= osnap->ref) /* Instruction belongs to next snapshot? */ |
225 | if (ins >= osnapref) { | 267 | loop_subst_snap(J, osnap++, loopmap, subst); /* Copy-substitute it. */ |
226 | SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; | ||
227 | BCReg s, nslots; | ||
228 | uint32_t nmapofs, nframelinks; | ||
229 | if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ | ||
230 | nmapofs = nsnapmap; | ||
231 | snap++; /* Add new snapshot. */ | ||
232 | } else { | ||
233 | nmapofs = snap->mapofs; /* Overwrite previous snapshot. */ | ||
234 | } | ||
235 | J->guardemit.irt = 0; | ||
236 | nslots = osnap->nslots; | ||
237 | nframelinks = osnap->nframelinks; | ||
238 | snap->mapofs = (uint16_t)nmapofs; | ||
239 | snap->ref = (IRRef1)J->cur.nins; | ||
240 | snap->nslots = (uint8_t)nslots; | ||
241 | snap->nframelinks = (uint8_t)nframelinks; | ||
242 | snap->count = 0; | ||
243 | osnap++; | ||
244 | osnapref = osnap->ref; | ||
245 | nsnapmap = nmapofs + nslots + nframelinks; | ||
246 | nmap = &J->cur.snapmap[nmapofs]; | ||
247 | /* Substitute snapshot slots. */ | ||
248 | for (s = 0; s < nslots; s++) { | ||
249 | IRRef ref = snap_ref(omap[s]); | ||
250 | if (ref) { | ||
251 | if (!irref_isk(ref)) | ||
252 | ref = subst[ref]; | ||
253 | } else if (s < loopslots) { | ||
254 | ref = loopmap[s]; | ||
255 | } | ||
256 | nmap[s] = ref; | ||
257 | } | ||
258 | /* Copy frame links. */ | ||
259 | nmap += nslots; | ||
260 | omap += nslots; | ||
261 | for (s = 0; s < nframelinks; s++) | ||
262 | nmap[s] = omap[s]; | ||
263 | } | ||
264 | 268 | ||
265 | /* Substitute instruction operands. */ | 269 | /* Substitute instruction operands. */ |
266 | ir = IR(ins); | 270 | ir = IR(ins); |
@@ -295,22 +299,24 @@ static void loop_unroll(jit_State *J) | |||
295 | } | 299 | } |
296 | } | 300 | } |
297 | } | 301 | } |
298 | if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ | 302 | if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ |
299 | J->cur.nsnapmap = (uint16_t)nsnapmap; | 303 | J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs; |
300 | snap++; | ||
301 | } else { | ||
302 | J->cur.nsnapmap = (uint16_t)snap->mapofs; /* Last snapshot is redundant. */ | ||
303 | } | ||
304 | J->cur.nsnap = (uint16_t)(snap - J->cur.snap); | ||
305 | lua_assert(J->cur.nsnapmap <= J->sizesnapmap); | 304 | lua_assert(J->cur.nsnapmap <= J->sizesnapmap); |
305 | *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ | ||
306 | 306 | ||
307 | loop_emit_phi(J, subst, phi, nphi); | 307 | loop_emit_phi(J, subst, phi, nphi); |
308 | } | 308 | } |
309 | 309 | ||
310 | /* Undo any partial changes made by the loop optimization. */ | 310 | /* Undo any partial changes made by the loop optimization. */ |
311 | static void loop_undo(jit_State *J, IRRef ins) | 311 | static void loop_undo(jit_State *J, IRRef ins, MSize nsnap) |
312 | { | 312 | { |
313 | ptrdiff_t i; | 313 | ptrdiff_t i; |
314 | SnapShot *snap = &J->cur.snap[nsnap-1]; | ||
315 | SnapEntry *map = J->cur.snapmap; | ||
316 | map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */ | ||
317 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + snap->nframelinks); | ||
318 | J->cur.nsnap = nsnap; | ||
319 | J->guardemit.irt = 0; | ||
314 | lj_ir_rollback(J, ins); | 320 | lj_ir_rollback(J, ins); |
315 | for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */ | 321 | for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */ |
316 | BPropEntry *bp = &J->bpropcache[i]; | 322 | BPropEntry *bp = &J->bpropcache[i]; |
@@ -336,6 +342,7 @@ static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) | |||
336 | int lj_opt_loop(jit_State *J) | 342 | int lj_opt_loop(jit_State *J) |
337 | { | 343 | { |
338 | IRRef nins = J->cur.nins; | 344 | IRRef nins = J->cur.nins; |
345 | MSize nsnap = J->cur.nsnap; | ||
339 | int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); | 346 | int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); |
340 | if (LJ_UNLIKELY(errcode)) { | 347 | if (LJ_UNLIKELY(errcode)) { |
341 | lua_State *L = J->L; | 348 | lua_State *L = J->L; |
@@ -348,8 +355,7 @@ int lj_opt_loop(jit_State *J) | |||
348 | if (--J->instunroll < 0) /* But do not unroll forever. */ | 355 | if (--J->instunroll < 0) /* But do not unroll forever. */ |
349 | break; | 356 | break; |
350 | L->top--; /* Remove error object. */ | 357 | L->top--; /* Remove error object. */ |
351 | J->guardemit.irt = 0; | 358 | loop_undo(J, nins, nsnap); |
352 | loop_undo(J, nins); | ||
353 | return 1; /* Loop optimization failed, continue recording. */ | 359 | return 1; /* Loop optimization failed, continue recording. */ |
354 | default: | 360 | default: |
355 | break; | 361 | break; |
diff --git a/src/lj_record.c b/src/lj_record.c index 6af25ccb..3f442088 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
@@ -1696,7 +1696,7 @@ static void optstate_comp(jit_State *J, int cond) | |||
1696 | const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0); | 1696 | const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0); |
1697 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | 1697 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
1698 | /* Avoid re-recording the comparison in side traces. */ | 1698 | /* Avoid re-recording the comparison in side traces. */ |
1699 | J->cur.snapmap[snap->mapofs + snap->nslots] = u32ptr(npc); | 1699 | J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); |
1700 | J->needsnap = 1; | 1700 | J->needsnap = 1; |
1701 | /* Shrink last snapshot if possible. */ | 1701 | /* Shrink last snapshot if possible. */ |
1702 | if (bc_a(jmpins) < J->maxslot) { | 1702 | if (bc_a(jmpins) < J->maxslot) { |
@@ -2159,61 +2159,62 @@ static void rec_setup_side(jit_State *J, Trace *T) | |||
2159 | { | 2159 | { |
2160 | SnapShot *snap = &T->snap[J->exitno]; | 2160 | SnapShot *snap = &T->snap[J->exitno]; |
2161 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 2161 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
2162 | BCReg s, nslots = snap->nslots; | 2162 | MSize n, nent = snap->nent; |
2163 | BloomFilter seen = 0; | 2163 | BloomFilter seen = 0; |
2164 | for (s = 0; s < nslots; s++) { | 2164 | /* Emit IR for slots inherited from parent snapshot. */ |
2165 | IRRef ref = snap_ref(map[s]); | 2165 | for (n = 0; n < nent; n++) { |
2166 | if (ref) { | 2166 | IRRef ref = snap_ref(map[n]); |
2167 | IRIns *ir = &T->ir[ref]; | 2167 | BCReg s = snap_slot(map[n]); |
2168 | TRef tr = 0; | 2168 | IRIns *ir = &T->ir[ref]; |
2169 | /* The bloom filter avoids O(nslots^2) overhead for de-duping slots. */ | 2169 | TRef tr; |
2170 | if (bloomtest(seen, ref)) { | 2170 | /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ |
2171 | BCReg j; | 2171 | if (bloomtest(seen, ref)) { |
2172 | for (j = 0; j < s; j++) | 2172 | MSize j; |
2173 | if (snap_ref(map[j]) == ref) { | 2173 | for (j = 0; j < n; j++) |
2174 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { | 2174 | if (snap_ref(map[j]) == ref) { |
2175 | lua_assert(s != 0); | 2175 | tr = J->slot[snap_slot(map[j])]; |
2176 | J->baseslot = s+1; | 2176 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { |
2177 | J->framedepth++; | 2177 | lua_assert(s != 0); |
2178 | } | ||
2179 | tr = J->slot[j]; | ||
2180 | goto dupslot; | ||
2181 | } | ||
2182 | } | ||
2183 | bloomset(seen, ref); | ||
2184 | switch ((IROp)ir->o) { | ||
2185 | case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break; | ||
2186 | case IR_KINT: tr = lj_ir_kint(J, ir->i); break; | ||
2187 | case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; | ||
2188 | case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; | ||
2189 | case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ | ||
2190 | if (irt_isfunc(ir->t)) { | ||
2191 | if (s != 0) { | ||
2192 | J->baseslot = s+1; | 2178 | J->baseslot = s+1; |
2193 | J->framedepth++; | 2179 | J->framedepth++; |
2194 | } | 2180 | } |
2195 | tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); | 2181 | goto dupslot; |
2196 | tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); | ||
2197 | } else { | ||
2198 | tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void)); | ||
2199 | tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr); | ||
2200 | } | 2182 | } |
2201 | break; | 2183 | } |
2202 | case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */ | 2184 | bloomset(seen, ref); |
2203 | tr = emitir_raw(ir->ot & ~IRT_GUARD, s, | 2185 | switch ((IROp)ir->o) { |
2204 | (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); | 2186 | /* Only have to deal with constants that can occur in stack slots. */ |
2205 | break; | 2187 | case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break; |
2206 | default: /* Parent refs are already typed and don't need a guard. */ | 2188 | case IR_KINT: tr = lj_ir_kint(J, ir->i); break; |
2207 | tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, | 2189 | case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; |
2208 | IRSLOAD_INHERIT|IRSLOAD_PARENT); | 2190 | case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; |
2209 | break; | 2191 | case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ |
2192 | if (irt_isfunc(ir->t)) { | ||
2193 | if (s != 0) { | ||
2194 | J->baseslot = s+1; | ||
2195 | J->framedepth++; | ||
2196 | } | ||
2197 | tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); | ||
2198 | tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); | ||
2199 | } else { | ||
2200 | tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void)); | ||
2201 | tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr); | ||
2210 | } | 2202 | } |
2211 | dupslot: | 2203 | break; |
2212 | J->slot[s] = tr; | 2204 | case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */ |
2205 | tr = emitir_raw(ir->ot & ~IRT_GUARD, s, | ||
2206 | (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); | ||
2207 | break; | ||
2208 | default: /* Parent refs are already typed and don't need a guard. */ | ||
2209 | tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, | ||
2210 | IRSLOAD_INHERIT|IRSLOAD_PARENT); | ||
2211 | break; | ||
2213 | } | 2212 | } |
2213 | dupslot: | ||
2214 | J->slot[s] = tr; | ||
2214 | } | 2215 | } |
2215 | J->base = J->slot + J->baseslot; | 2216 | J->base = J->slot + J->baseslot; |
2216 | J->maxslot = nslots - J->baseslot; | 2217 | J->maxslot = snap->nslots - J->baseslot; |
2217 | lj_snap_add(J); | 2218 | lj_snap_add(J); |
2218 | } | 2219 | } |
2219 | 2220 | ||
@@ -2259,7 +2260,7 @@ void lj_record_setup(jit_State *J) | |||
2259 | J->cur.root = (uint16_t)root; | 2260 | J->cur.root = (uint16_t)root; |
2260 | J->cur.startins = BCINS_AD(BC_JMP, 0, 0); | 2261 | J->cur.startins = BCINS_AD(BC_JMP, 0, 0); |
2261 | /* Check whether we could at least potentially form an extra loop. */ | 2262 | /* Check whether we could at least potentially form an extra loop. */ |
2262 | if (J->exitno == 0 && T->snap[0].nslots == 1 && T->snapmap[0] == 0) { | 2263 | if (J->exitno == 0 && T->snap[0].nent == 0) { |
2263 | /* We can narrow a FORL for some side traces, too. */ | 2264 | /* We can narrow a FORL for some side traces, too. */ |
2264 | if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI && | 2265 | if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI && |
2265 | bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { | 2266 | bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { |
diff --git a/src/lj_snap.c b/src/lj_snap.c index f262e1c9..d22c90a4 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
@@ -23,28 +23,50 @@ | |||
23 | /* Some local macros to save typing. Undef'd at the end. */ | 23 | /* Some local macros to save typing. Undef'd at the end. */ |
24 | #define IR(ref) (&J->cur.ir[(ref)]) | 24 | #define IR(ref) (&J->cur.ir[(ref)]) |
25 | 25 | ||
26 | /* -- Snapshot buffer allocation ------------------------------------------ */ | ||
27 | |||
28 | /* Grow snapshot buffer. */ | ||
29 | void lj_snap_grow_buf_(jit_State *J, MSize need) | ||
30 | { | ||
31 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; | ||
32 | if (need > maxsnap) | ||
33 | lj_trace_err(J, LJ_TRERR_SNAPOV); | ||
34 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); | ||
35 | J->cur.snap = J->snapbuf; | ||
36 | } | ||
37 | |||
38 | /* Grow snapshot map buffer. */ | ||
39 | void lj_snap_grow_map_(jit_State *J, MSize need) | ||
40 | { | ||
41 | if (need < 2*J->sizesnapmap) | ||
42 | need = 2*J->sizesnapmap; | ||
43 | else if (need < 64) | ||
44 | need = 64; | ||
45 | J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, | ||
46 | J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry)); | ||
47 | J->cur.snapmap = J->snapmapbuf; | ||
48 | J->sizesnapmap = need; | ||
49 | } | ||
50 | |||
26 | /* -- Snapshot generation ------------------------------------------------- */ | 51 | /* -- Snapshot generation ------------------------------------------------- */ |
27 | 52 | ||
28 | /* NYI: Snapshots are in need of a redesign. The current storage model for | 53 | /* NYI: IR_FRAME should be eliminated, too. */ |
29 | ** snapshot maps is too wasteful. They could be compressed (1D or 2D) and | ||
30 | ** made more flexible at the same time. Iterators should no longer need to | ||
31 | ** skip unmodified slots. IR_FRAME should be eliminated, too. | ||
32 | */ | ||
33 | 54 | ||
34 | /* Add all modified slots to the snapshot. */ | 55 | /* Add all modified slots to the snapshot. */ |
35 | static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | 56 | static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) |
36 | { | 57 | { |
37 | BCReg s; | 58 | BCReg s; |
59 | MSize n = 0; | ||
38 | for (s = 0; s < nslots; s++) { | 60 | for (s = 0; s < nslots; s++) { |
39 | IRRef ref = tref_ref(J->slot[s]); | 61 | IRRef ref = tref_ref(J->slot[s]); |
40 | if (ref) { | 62 | if (ref) { |
41 | IRIns *ir = IR(ref); | 63 | IRIns *ir = IR(ref); |
42 | if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT)) | 64 | if (!(ir->o == IR_SLOAD && ir->op1 == s && |
43 | ref = 0; | 65 | !(ir->op2 & IRSLOAD_INHERIT))) |
66 | map[n++] = SNAP(s, ir->o == IR_FRAME ? SNAP_FRAME : 0, ref); | ||
44 | } | 67 | } |
45 | map[s] = (SnapEntry)ref; | ||
46 | } | 68 | } |
47 | return nslots; | 69 | return n; |
48 | } | 70 | } |
49 | 71 | ||
50 | /* Add frame links at the end of the snapshot. */ | 72 | /* Add frame links at the end of the snapshot. */ |
@@ -53,17 +75,17 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map) | |||
53 | cTValue *frame = J->L->base - 1; | 75 | cTValue *frame = J->L->base - 1; |
54 | cTValue *lim = J->L->base - J->baseslot; | 76 | cTValue *lim = J->L->base - J->baseslot; |
55 | MSize f = 0; | 77 | MSize f = 0; |
56 | map[f++] = u32ptr(J->pc); | 78 | map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ |
57 | while (frame > lim) { | 79 | while (frame > lim) { /* Backwards traversal of all frames above base. */ |
58 | if (frame_islua(frame)) { | 80 | if (frame_islua(frame)) { |
59 | map[f++] = u32ptr(frame_pc(frame)); | 81 | map[f++] = SNAP_MKPC(frame_pc(frame)); |
60 | frame = frame_prevl(frame); | 82 | frame = frame_prevl(frame); |
61 | } else if (frame_ispcall(frame)) { | 83 | } else if (frame_ispcall(frame)) { |
62 | map[f++] = (uint32_t)frame_ftsz(frame); | 84 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
63 | frame = frame_prevd(frame); | 85 | frame = frame_prevd(frame); |
64 | } else if (frame_iscont(frame)) { | 86 | } else if (frame_iscont(frame)) { |
65 | map[f++] = (uint32_t)frame_ftsz(frame); | 87 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
66 | map[f++] = u32ptr(frame_contpc(frame)); | 88 | map[f++] = SNAP_MKPC(frame_contpc(frame)); |
67 | frame = frame_prevd(frame); | 89 | frame = frame_prevd(frame); |
68 | } else { | 90 | } else { |
69 | lua_assert(0); | 91 | lua_assert(0); |
@@ -76,28 +98,19 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map) | |||
76 | static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) | 98 | static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) |
77 | { | 99 | { |
78 | BCReg nslots = J->baseslot + J->maxslot; | 100 | BCReg nslots = J->baseslot + J->maxslot; |
79 | MSize nsm, nframelinks; | 101 | MSize nent, nframelinks; |
80 | SnapEntry *p; | 102 | SnapEntry *p; |
81 | /* Conservative estimate. Continuation frames need 2 slots. */ | 103 | /* Conservative estimate. Continuation frames need 2 slots. */ |
82 | nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1; | 104 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth*2+1); |
83 | if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */ | ||
84 | if (nsm < 2*J->sizesnapmap) | ||
85 | nsm = 2*J->sizesnapmap; | ||
86 | else if (nsm < 64) | ||
87 | nsm = 64; | ||
88 | J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, | ||
89 | J->sizesnapmap*sizeof(SnapEntry), nsm*sizeof(SnapEntry)); | ||
90 | J->cur.snapmap = J->snapmapbuf; | ||
91 | J->sizesnapmap = nsm; | ||
92 | } | ||
93 | p = &J->cur.snapmap[nsnapmap]; | 105 | p = &J->cur.snapmap[nsnapmap]; |
94 | nslots = snapshot_slots(J, p, nslots); | 106 | nent = snapshot_slots(J, p, nslots); |
95 | nframelinks = snapshot_framelinks(J, p + nslots); | 107 | nframelinks = snapshot_framelinks(J, p + nent); |
96 | J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks); | 108 | J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + nframelinks); |
97 | snap->mapofs = (uint16_t)nsnapmap; | 109 | snap->mapofs = (uint16_t)nsnapmap; |
98 | snap->ref = (IRRef1)J->cur.nins; | 110 | snap->ref = (IRRef1)J->cur.nins; |
99 | snap->nslots = (uint8_t)nslots; | 111 | snap->nent = (uint8_t)nent; |
100 | snap->nframelinks = (uint8_t)nframelinks; | 112 | snap->nframelinks = (uint8_t)nframelinks; |
113 | snap->nslots = (uint8_t)nslots; | ||
101 | snap->count = 0; | 114 | snap->count = 0; |
102 | } | 115 | } |
103 | 116 | ||
@@ -111,14 +124,7 @@ void lj_snap_add(jit_State *J) | |||
111 | (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { | 124 | (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { |
112 | nsnapmap = J->cur.snap[--nsnap].mapofs; | 125 | nsnapmap = J->cur.snap[--nsnap].mapofs; |
113 | } else { | 126 | } else { |
114 | /* Need to grow snapshot buffer? */ | 127 | lj_snap_grow_buf(J, nsnap+1); |
115 | if (LJ_UNLIKELY(nsnap >= J->sizesnap)) { | ||
116 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; | ||
117 | if (nsnap >= maxsnap) | ||
118 | lj_trace_err(J, LJ_TRERR_SNAPOV); | ||
119 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); | ||
120 | J->cur.snap = J->snapbuf; | ||
121 | } | ||
122 | J->cur.nsnap = (uint16_t)(nsnap+1); | 128 | J->cur.nsnap = (uint16_t)(nsnap+1); |
123 | } | 129 | } |
124 | J->mergesnap = 0; | 130 | J->mergesnap = 0; |
@@ -131,14 +137,21 @@ void lj_snap_shrink(jit_State *J) | |||
131 | { | 137 | { |
132 | BCReg nslots = J->baseslot + J->maxslot; | 138 | BCReg nslots = J->baseslot + J->maxslot; |
133 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | 139 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
134 | SnapEntry *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots]; | 140 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; |
135 | SnapEntry *nflinks = &J->cur.snapmap[snap->mapofs + nslots]; | 141 | MSize nent = snap->nent; |
136 | uint32_t s, nframelinks = snap->nframelinks; | ||
137 | lua_assert(nslots < snap->nslots); | 142 | lua_assert(nslots < snap->nslots); |
138 | snap->nslots = (uint8_t)nslots; | 143 | snap->nslots = (uint8_t)nslots; |
139 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks); | 144 | if (nent > 0 && snap_slot(map[nent-1]) >= nslots) { |
140 | for (s = 0; s < nframelinks; s++) /* Move frame links down. */ | 145 | MSize s, delta, nframelinks = snap->nframelinks; |
141 | nflinks[s] = oflinks[s]; | 146 | for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--) |
147 | ; | ||
148 | delta = snap->nent - nent; | ||
149 | snap->nent = (uint8_t)nent; | ||
150 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + nframelinks); | ||
151 | map += nent; | ||
152 | for (s = 0; s < nframelinks; s++) /* Move frame links down. */ | ||
153 | map[s] = map[s+delta]; | ||
154 | } | ||
142 | } | 155 | } |
143 | 156 | ||
144 | /* -- Snapshot access ----------------------------------------------------- */ | 157 | /* -- Snapshot access ----------------------------------------------------- */ |
@@ -167,21 +180,24 @@ static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs) | |||
167 | return rs; | 180 | return rs; |
168 | } | 181 | } |
169 | 182 | ||
170 | /* Convert a snapshot into a linear slot -> RegSP map. */ | 183 | /* Convert a snapshot into a linear slot -> RegSP map. |
184 | ** Note: unused slots are not initialized! | ||
185 | */ | ||
171 | void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) | 186 | void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) |
172 | { | 187 | { |
173 | SnapShot *snap = &T->snap[snapno]; | 188 | SnapShot *snap = &T->snap[snapno]; |
174 | BCReg s, nslots = snap->nslots; | 189 | MSize n, nent = snap->nent; |
175 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 190 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
176 | BloomFilter rfilt = snap_renamefilter(T, snapno); | 191 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
177 | for (s = 0; s < nslots; s++) { | 192 | for (n = 0; n < nent; n++) { |
178 | IRRef ref = snap_ref(map[s]); | 193 | SnapEntry sn = map[n]; |
194 | IRRef ref = snap_ref(sn); | ||
179 | if (!irref_isk(ref)) { | 195 | if (!irref_isk(ref)) { |
180 | IRIns *ir = &T->ir[ref]; | 196 | IRIns *ir = &T->ir[ref]; |
181 | uint32_t rs = ir->prev; | 197 | uint32_t rs = ir->prev; |
182 | if (bloomtest(rfilt, ref)) | 198 | if (bloomtest(rfilt, ref)) |
183 | rs = snap_renameref(T, snapno, ref, rs); | 199 | rs = snap_renameref(T, snapno, ref, rs); |
184 | rsmap[s] = (uint16_t)rs; | 200 | rsmap[snap_slot(sn)] = (uint16_t)rs; |
185 | } | 201 | } |
186 | } | 202 | } |
187 | } | 203 | } |
@@ -193,89 +209,88 @@ void lj_snap_restore(jit_State *J, void *exptr) | |||
193 | SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ | 209 | SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ |
194 | Trace *T = J->trace[J->parent]; | 210 | Trace *T = J->trace[J->parent]; |
195 | SnapShot *snap = &T->snap[snapno]; | 211 | SnapShot *snap = &T->snap[snapno]; |
196 | BCReg s, nslots = snap->nslots; | 212 | MSize n, nent = snap->nent; |
197 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 213 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
198 | SnapEntry *flinks = map + nslots + snap->nframelinks; | 214 | SnapEntry *flinks = map + nent + snap->nframelinks; |
199 | TValue *o, *newbase, *ntop; | 215 | BCReg nslots = snap->nslots; |
216 | TValue *frame; | ||
200 | BloomFilter rfilt = snap_renamefilter(T, snapno); | 217 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
201 | lua_State *L = J->L; | 218 | lua_State *L = J->L; |
202 | 219 | ||
203 | /* Make sure the stack is big enough for the slots from the snapshot. */ | 220 | /* Make sure the stack is big enough for the slots from the snapshot. */ |
204 | if (L->base + nslots >= L->maxstack) { | 221 | if (LJ_UNLIKELY(L->base + nslots > L->maxstack)) { |
205 | L->top = curr_topL(L); | 222 | L->top = curr_topL(L); |
206 | lj_state_growstack(L, nslots - curr_proto(L)->framesize); | 223 | lj_state_growstack(L, nslots - curr_proto(L)->framesize); |
207 | } | 224 | } |
208 | 225 | ||
209 | /* Fill stack slots with data from the registers and spill slots. */ | 226 | /* Fill stack slots with data from the registers and spill slots. */ |
210 | newbase = NULL; | 227 | frame = L->base-1; |
211 | ntop = L->base; | 228 | for (n = 0; n < nent; n++) { |
212 | for (s = 0, o = L->base-1; s < nslots; s++, o++) { | 229 | IRRef ref = snap_ref(map[n]); |
213 | IRRef ref = snap_ref(map[s]); | 230 | BCReg s = snap_slot(map[n]); |
214 | if (ref) { | 231 | TValue *o = &frame[s]; /* Stack slots are relative to start frame. */ |
215 | IRIns *ir = &T->ir[ref]; | 232 | IRIns *ir = &T->ir[ref]; |
216 | if (irref_isk(ref)) { /* Restore constant slot. */ | 233 | if (irref_isk(ref)) { /* Restore constant slot. */ |
217 | lj_ir_kvalue(L, o, ir); | 234 | lj_ir_kvalue(L, o, ir); |
218 | } else { | 235 | } else { |
219 | IRType1 t = ir->t; | 236 | IRType1 t = ir->t; |
220 | RegSP rs = ir->prev; | 237 | RegSP rs = ir->prev; |
221 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) | 238 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) |
222 | rs = snap_renameref(T, snapno, ref, rs); | 239 | rs = snap_renameref(T, snapno, ref, rs); |
223 | if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ | 240 | if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ |
224 | int32_t *sps = &ex->spill[regsp_spill(rs)]; | 241 | int32_t *sps = &ex->spill[regsp_spill(rs)]; |
225 | if (irt_isinteger(t)) { | 242 | if (irt_isinteger(t)) { |
226 | setintV(o, *sps); | 243 | setintV(o, *sps); |
227 | } else if (irt_isnum(t)) { | 244 | } else if (irt_isnum(t)) { |
228 | o->u64 = *(uint64_t *)sps; | 245 | o->u64 = *(uint64_t *)sps; |
229 | } else { | 246 | } else { |
230 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ | 247 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ |
231 | setgcrefi(o->gcr, *sps); | 248 | setgcrefi(o->gcr, *sps); |
232 | setitype(o, irt_toitype(t)); | 249 | setitype(o, irt_toitype(t)); |
233 | } | 250 | } |
234 | } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ | 251 | } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ |
235 | Reg r = regsp_reg(rs); | 252 | Reg r = regsp_reg(rs); |
236 | if (irt_isinteger(t)) { | 253 | if (irt_isinteger(t)) { |
237 | setintV(o, ex->gpr[r-RID_MIN_GPR]); | 254 | setintV(o, ex->gpr[r-RID_MIN_GPR]); |
238 | } else if (irt_isnum(t)) { | 255 | } else if (irt_isnum(t)) { |
239 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); | 256 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); |
240 | } else { | 257 | } else { |
241 | if (!irt_ispri(t)) | 258 | if (!irt_ispri(t)) |
242 | setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); | 259 | setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); |
243 | setitype(o, irt_toitype(t)); | 260 | setitype(o, irt_toitype(t)); |
244 | } | 261 | } |
245 | } else { /* Restore frame slot. */ | 262 | } else { /* Restore frame slot. */ |
246 | lua_assert(ir->o == IR_FRAME); | 263 | lua_assert(ir->o == IR_FRAME); |
247 | /* This works for both PTR and FUNC IR_FRAME. */ | 264 | /* This works for both PTR and FUNC IR_FRAME. */ |
248 | setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); | 265 | setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); |
249 | if (s != 0) /* Do not overwrite link to previous frame. */ | 266 | if (s != 0) /* Do not overwrite link to previous frame. */ |
250 | o->fr.tp.ftsz = (int32_t)*--flinks; | 267 | o->fr.tp.ftsz = (int32_t)*--flinks; |
251 | if (irt_isfunc(ir->t)) { | 268 | if (irt_isfunc(ir->t)) { |
252 | GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); | 269 | GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); |
253 | if (isluafunc(fn)) { | 270 | if (isluafunc(fn)) { |
254 | TValue *fs; | 271 | MSize framesize = funcproto(fn)->framesize; |
255 | fs = o+1 + funcproto(fn)->framesize; | 272 | TValue *fs; |
256 | if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ | 273 | L->base = ++o; |
257 | if (s != 0) newbase = o+1; | 274 | if (LJ_UNLIKELY(o + framesize > L->maxstack)) { /* Grow again? */ |
275 | ptrdiff_t fsave = savestack(L, frame); | ||
276 | L->top = o; | ||
277 | lj_state_growstack(L, framesize); | ||
278 | frame = restorestack(L, fsave); | ||
279 | o = L->top; | ||
258 | } | 280 | } |
281 | fs = o + framesize; | ||
282 | if (s == 0) /* Only partially clear tail call frame at #0. */ | ||
283 | o = &frame[nslots]; | ||
284 | while (o < fs) /* Clear slots of newly added frames. */ | ||
285 | setnilV(o++); | ||
259 | } | 286 | } |
260 | } | 287 | } |
261 | } | 288 | } |
262 | } else { | ||
263 | lua_assert(!newbase); | ||
264 | } | 289 | } |
265 | } | 290 | } |
266 | if (newbase) L->base = newbase; | ||
267 | if (ntop >= L->maxstack) { /* Need to grow the stack again. */ | ||
268 | MSize need = (MSize)(ntop - o); | ||
269 | L->top = o; | ||
270 | lj_state_growstack(L, need); | ||
271 | o = L->top; | ||
272 | ntop = o + need; | ||
273 | } | ||
274 | L->top = curr_topL(L); | 291 | L->top = curr_topL(L); |
275 | for (; o < ntop; o++) /* Clear remainder of newly added frames. */ | 292 | J->pc = snap_pc(*--flinks); |
276 | setnilV(o); | 293 | lua_assert(map + nent == flinks); |
277 | lua_assert(map + nslots == flinks-1); | ||
278 | J->pc = (const BCIns *)(uintptr_t)(*--flinks); | ||
279 | } | 294 | } |
280 | 295 | ||
281 | #undef IR | 296 | #undef IR |
diff --git a/src/lj_snap.h b/src/lj_snap.h index 776a0bcf..ed7d98a1 100644 --- a/src/lj_snap.h +++ b/src/lj_snap.h | |||
@@ -14,6 +14,19 @@ LJ_FUNC void lj_snap_add(jit_State *J); | |||
14 | LJ_FUNC void lj_snap_shrink(jit_State *J); | 14 | LJ_FUNC void lj_snap_shrink(jit_State *J); |
15 | LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno); | 15 | LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno); |
16 | LJ_FUNC void lj_snap_restore(jit_State *J, void *exptr); | 16 | LJ_FUNC void lj_snap_restore(jit_State *J, void *exptr); |
17 | LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); | ||
18 | LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need); | ||
19 | |||
20 | static LJ_AINLINE void lj_snap_grow_buf(jit_State *J, MSize need) | ||
21 | { | ||
22 | if (LJ_UNLIKELY(need > J->sizesnap)) lj_snap_grow_buf_(J, need); | ||
23 | } | ||
24 | |||
25 | static LJ_AINLINE void lj_snap_grow_map(jit_State *J, MSize need) | ||
26 | { | ||
27 | if (LJ_UNLIKELY(need > J->sizesnapmap)) lj_snap_grow_map_(J, need); | ||
28 | } | ||
29 | |||
17 | #endif | 30 | #endif |
18 | 31 | ||
19 | #endif | 32 | #endif |
diff --git a/src/lj_trace.c b/src/lj_trace.c index a5468655..fb36c7ee 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c | |||
@@ -161,8 +161,8 @@ void lj_trace_reenableproto(GCproto *pt) | |||
161 | static void trace_unpatch(jit_State *J, Trace *T) | 161 | static void trace_unpatch(jit_State *J, Trace *T) |
162 | { | 162 | { |
163 | BCOp op = bc_op(T->startins); | 163 | BCOp op = bc_op(T->startins); |
164 | uint32_t pcofs = T->snap[0].mapofs + T->snap[0].nslots; | 164 | MSize pcofs = T->snap[0].mapofs + T->snap[0].nent; |
165 | BCIns *pc = ((BCIns *)(uintptr_t)T->snapmap[pcofs]) - 1; | 165 | BCIns *pc = ((BCIns *)snap_pc(T->snapmap[pcofs])) - 1; |
166 | switch (op) { | 166 | switch (op) { |
167 | case BC_FORL: | 167 | case BC_FORL: |
168 | lua_assert(bc_op(*pc) == BC_JFORI); | 168 | lua_assert(bc_op(*pc) == BC_JFORI); |
@@ -352,7 +352,6 @@ static void trace_start(jit_State *J) | |||
352 | J->cur.ir = J->irbuf; | 352 | J->cur.ir = J->irbuf; |
353 | J->cur.snap = J->snapbuf; | 353 | J->cur.snap = J->snapbuf; |
354 | J->cur.snapmap = J->snapmapbuf; | 354 | J->cur.snapmap = J->snapmapbuf; |
355 | /* J->cur.nsnapmap = 0; */ | ||
356 | J->mergesnap = 0; | 355 | J->mergesnap = 0; |
357 | J->needsnap = 0; | 356 | J->needsnap = 0; |
358 | J->guardemit.irt = 0; | 357 | J->guardemit.irt = 0; |