diff options
| author | Mike Pall <mike> | 2010-01-26 21:49:04 +0100 |
|---|---|---|
| committer | Mike Pall <mike> | 2010-01-26 21:49:04 +0100 |
| commit | 67ca399a30cec05acacd7ea33d5cb0e361f92755 (patch) | |
| tree | 335806ea53e2f98a71eb2395baa1d3d7bea352ae | |
| parent | e058714a2e3745a819b77e6b50551e423897026a (diff) | |
| download | luajit-67ca399a30cec05acacd7ea33d5cb0e361f92755.tar.gz luajit-67ca399a30cec05acacd7ea33d5cb0e361f92755.tar.bz2 luajit-67ca399a30cec05acacd7ea33d5cb0e361f92755.zip | |
Compress snapshots using a simple, extensible 1D-compression.
Typically reduces storage overhead for snapshot maps by 60%.
The extensible format is a prerequisite for the next redesign steps:
Eliminate IR_FRAME and implement return-to-lower-frame.
| -rw-r--r-- | src/Makefile.dep | 6 | ||||
| -rw-r--r-- | src/lib_jit.c | 21 | ||||
| -rw-r--r-- | src/lj_asm.c | 101 | ||||
| -rw-r--r-- | src/lj_gdbjit.c | 4 | ||||
| -rw-r--r-- | src/lj_jit.h | 16 | ||||
| -rw-r--r-- | src/lj_opt_dce.c | 6 | ||||
| -rw-r--r-- | src/lj_opt_loop.c | 168 | ||||
| -rw-r--r-- | src/lj_record.c | 97 | ||||
| -rw-r--r-- | src/lj_snap.c | 247 | ||||
| -rw-r--r-- | src/lj_snap.h | 13 | ||||
| -rw-r--r-- | src/lj_trace.c | 5 |
11 files changed, 365 insertions, 319 deletions
diff --git a/src/Makefile.dep b/src/Makefile.dep index 779ee545..ffb7d79b 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep | |||
| @@ -11,7 +11,7 @@ buildvm_lib.o: buildvm_lib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
| 11 | buildvm_peobj.o: buildvm_peobj.c buildvm.h lj_def.h lua.h luaconf.h \ | 11 | buildvm_peobj.o: buildvm_peobj.c buildvm.h lj_def.h lua.h luaconf.h \ |
| 12 | lj_arch.h lj_bc.h | 12 | lj_arch.h lj_bc.h |
| 13 | lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ | 13 | lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ |
| 14 | lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_alloc.h | 14 | lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_lib.h lj_alloc.h |
| 15 | lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ | 15 | lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ |
| 16 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ | 16 | lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ |
| 17 | lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h | 17 | lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h |
| @@ -87,8 +87,8 @@ lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | |||
| 87 | lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ | 87 | lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ |
| 88 | lj_traceerr.h lj_vm.h lj_folddef.h | 88 | lj_traceerr.h lj_vm.h lj_folddef.h |
| 89 | lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 89 | lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
| 90 | lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \ | 90 | lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ |
| 91 | lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h | 91 | lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h |
| 92 | lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ | 92 | lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ |
| 93 | lj_tab.h lj_ir.h lj_jit.h lj_iropt.h | 93 | lj_tab.h lj_ir.h lj_jit.h lj_iropt.h |
| 94 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ | 94 | lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ |
diff --git a/src/lib_jit.c b/src/lib_jit.c index aa421613..f3425d98 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c | |||
| @@ -332,18 +332,25 @@ LJLIB_CF(jit_util_tracesnap) | |||
| 332 | if (T && sn < T->nsnap) { | 332 | if (T && sn < T->nsnap) { |
| 333 | SnapShot *snap = &T->snap[sn]; | 333 | SnapShot *snap = &T->snap[sn]; |
| 334 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 334 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
| 335 | BCReg s, nslots = snap->nslots; | 335 | MSize n, nent = snap->nent; |
| 336 | BCReg nslots = snap->nslots; | ||
| 336 | GCtab *t; | 337 | GCtab *t; |
| 337 | lua_createtable(L, nslots ? (int)nslots : 1, 0); | 338 | lua_createtable(L, nslots ? (int)nslots : 1, 0); |
| 338 | t = tabV(L->top-1); | 339 | t = tabV(L->top-1); |
| 339 | setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS); | 340 | setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS); |
| 340 | for (s = 0; s < nslots; s++) { | 341 | /* NYI: get rid of this and expose the compressed slot map. */ |
| 341 | TValue *o = lj_tab_setint(L, t, (int32_t)(s+1)); | 342 | { |
| 342 | IRRef ref = snap_ref(map[s]); | 343 | BCReg s; |
| 343 | if (ref) | 344 | for (s = 0; s < nslots; s++) { |
| 344 | setintV(o, (int32_t)ref - REF_BIAS); | 345 | TValue *o = lj_tab_setint(L, t, (int32_t)(s+1)); |
| 345 | else | ||
| 346 | setboolV(o, 0); | 346 | setboolV(o, 0); |
| 347 | } | ||
| 348 | } | ||
| 349 | for (n = 0; n < nent; n++) { | ||
| 350 | BCReg s = snap_slot(map[n]); | ||
| 351 | IRRef ref = snap_ref(map[n]); | ||
| 352 | TValue *o = lj_tab_setint(L, t, (int32_t)(s+1)); | ||
| 353 | setintV(o, (int32_t)ref - REF_BIAS); | ||
| 347 | } | 354 | } |
| 348 | return 1; | 355 | return 1; |
| 349 | } | 356 | } |
diff --git a/src/lj_asm.c b/src/lj_asm.c index 9f5ce030..b3656e00 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c | |||
| @@ -926,9 +926,9 @@ static void asm_snap_alloc(ASMState *as) | |||
| 926 | { | 926 | { |
| 927 | SnapShot *snap = &as->T->snap[as->snapno]; | 927 | SnapShot *snap = &as->T->snap[as->snapno]; |
| 928 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 928 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
| 929 | BCReg s, nslots = snap->nslots; | 929 | MSize n, nent = snap->nent; |
| 930 | for (s = 0; s < nslots; s++) { | 930 | for (n = 0; n < nent; n++) { |
| 931 | IRRef ref = snap_ref(map[s]); | 931 | IRRef ref = snap_ref(map[n]); |
| 932 | if (!irref_isk(ref)) { | 932 | if (!irref_isk(ref)) { |
| 933 | IRIns *ir = IR(ref); | 933 | IRIns *ir = IR(ref); |
| 934 | if (!ra_used(ir) && ir->o != IR_FRAME) { | 934 | if (!ra_used(ir) && ir->o != IR_FRAME) { |
| @@ -960,9 +960,9 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren) | |||
| 960 | { | 960 | { |
| 961 | SnapShot *snap = &as->T->snap[as->snapno]; | 961 | SnapShot *snap = &as->T->snap[as->snapno]; |
| 962 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 962 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
| 963 | BCReg s, nslots = snap->nslots; | 963 | MSize n, nent = snap->nent; |
| 964 | for (s = 0; s < nslots; s++) { | 964 | for (n = 0; n < nent; n++) { |
| 965 | IRRef ref = snap_ref(map[s]); | 965 | IRRef ref = snap_ref(map[n]); |
| 966 | if (ref == ren) { | 966 | if (ref == ren) { |
| 967 | IRIns *ir = IR(ref); | 967 | IRIns *ir = IR(ref); |
| 968 | ra_spill(as, ir); /* Register renamed, so force a spill slot. */ | 968 | ra_spill(as, ir); /* Register renamed, so force a spill slot. */ |
| @@ -2465,18 +2465,17 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base) | |||
| 2465 | */ | 2465 | */ |
| 2466 | RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base); | 2466 | RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base); |
| 2467 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 2467 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
| 2468 | BCReg s, nslots = snap->nslots; | 2468 | MSize n, nent = snap->nent; |
| 2469 | for (s = 0; s < nslots; s++) { | 2469 | for (n = 0; n < nent; n++) { |
| 2470 | IRRef ref = snap_ref(map[s]); | 2470 | IRRef ref = snap_ref(map[n]); |
| 2471 | if (!irref_isk(ref)) { | 2471 | if (!irref_isk(ref)) { |
| 2472 | int32_t ofs = 8*(int32_t)(snap_slot(map[n])-1); | ||
| 2472 | IRIns *ir = IR(ref); | 2473 | IRIns *ir = IR(ref); |
| 2473 | if (ir->o == IR_FRAME) { | 2474 | if (ir->o == IR_FRAME) { |
| 2474 | /* NYI: sync the frame, bump base, set topslot, clear new slots. */ | 2475 | /* NYI: sync the frame, bump base, set topslot, clear new slots. */ |
| 2475 | lj_trace_err(as->J, LJ_TRERR_NYIGCF); | 2476 | lj_trace_err(as->J, LJ_TRERR_NYIGCF); |
| 2476 | } else if (irt_isgcv(ir->t) && | 2477 | } else if (irt_isgcv(ir->t)) { |
| 2477 | !(ir->o == IR_SLOAD && ir->op1 < nslots && map[ir->op1] == 0)) { | ||
| 2478 | Reg src = ra_alloc1(as, ref, allow); | 2478 | Reg src = ra_alloc1(as, ref, allow); |
| 2479 | int32_t ofs = 8*(int32_t)(s-1); | ||
| 2480 | emit_movtomro(as, src, base, ofs); | 2479 | emit_movtomro(as, src, base, ofs); |
| 2481 | emit_movmroi(as, base, ofs+4, irt_toitype(ir->t)); | 2480 | emit_movmroi(as, base, ofs+4, irt_toitype(ir->t)); |
| 2482 | checkmclim(as); | 2481 | checkmclim(as); |
| @@ -2504,7 +2503,7 @@ static void asm_gc_check(ASMState *as, SnapShot *snap) | |||
| 2504 | emit_loadi(as, tmp, (int32_t)as->gcsteps); | 2503 | emit_loadi(as, tmp, (int32_t)as->gcsteps); |
| 2505 | /* We don't know spadj yet, so get the C frame from L->cframe. */ | 2504 | /* We don't know spadj yet, so get the C frame from L->cframe. */ |
| 2506 | emit_movmroi(as, tmp, CFRAME_OFS_PC, | 2505 | emit_movmroi(as, tmp, CFRAME_OFS_PC, |
| 2507 | (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]); | 2506 | (int32_t)as->T->snapmap[snap->mapofs+snap->nent]); |
| 2508 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); | 2507 | emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); |
| 2509 | lstate = IR(ASMREF_L)->r; | 2508 | lstate = IR(ASMREF_L)->r; |
| 2510 | emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); | 2509 | emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); |
| @@ -2965,19 +2964,19 @@ static void asm_head_side(ASMState *as) | |||
| 2965 | static void asm_tail_sync(ASMState *as) | 2964 | static void asm_tail_sync(ASMState *as) |
| 2966 | { | 2965 | { |
| 2967 | SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ | 2966 | SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ |
| 2968 | BCReg s, nslots = snap->nslots; | 2967 | MSize n, nent = snap->nent; |
| 2969 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; | 2968 | SnapEntry *map = &as->T->snapmap[snap->mapofs]; |
| 2970 | SnapEntry *flinks = map + nslots + snap->nframelinks; | 2969 | SnapEntry *flinks = map + nent + snap->nframelinks; |
| 2971 | BCReg newbase = 0; | 2970 | BCReg newbase = 0; |
| 2972 | BCReg secondbase = ~(BCReg)0; | 2971 | BCReg nslots, topslot = 0; |
| 2973 | BCReg topslot = 0; | ||
| 2974 | 2972 | ||
| 2975 | checkmclim(as); | 2973 | checkmclim(as); |
| 2976 | ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); | 2974 | ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); |
| 2977 | 2975 | ||
| 2978 | /* Must check all frames to find topslot (outer can be larger than inner). */ | 2976 | /* Must check all frames to find topslot (outer can be larger than inner). */ |
| 2979 | for (s = 0; s < nslots; s++) { | 2977 | for (n = 0; n < nent; n++) { |
| 2980 | IRRef ref = snap_ref(map[s]); | 2978 | IRRef ref = snap_ref(map[n]); |
| 2979 | BCReg s = snap_slot(map[n]); | ||
| 2981 | if (!irref_isk(ref)) { | 2980 | if (!irref_isk(ref)) { |
| 2982 | IRIns *ir = IR(ref); | 2981 | IRIns *ir = IR(ref); |
| 2983 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { | 2982 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { |
| @@ -2985,10 +2984,7 @@ static void asm_tail_sync(ASMState *as) | |||
| 2985 | if (isluafunc(fn)) { | 2984 | if (isluafunc(fn)) { |
| 2986 | BCReg fs = s + funcproto(fn)->framesize; | 2985 | BCReg fs = s + funcproto(fn)->framesize; |
| 2987 | if (fs > topslot) topslot = fs; | 2986 | if (fs > topslot) topslot = fs; |
| 2988 | if (s != 0) { | 2987 | newbase = s; |
| 2989 | newbase = s; | ||
| 2990 | if (secondbase == ~(BCReg)0) secondbase = s; | ||
| 2991 | } | ||
| 2992 | } | 2988 | } |
| 2993 | } | 2989 | } |
| 2994 | } | 2990 | } |
| @@ -2998,7 +2994,7 @@ static void asm_tail_sync(ASMState *as) | |||
| 2998 | if (as->T->link == TRACE_INTERP) { | 2994 | if (as->T->link == TRACE_INTERP) { |
| 2999 | /* Setup fixed registers for exit to interpreter. */ | 2995 | /* Setup fixed registers for exit to interpreter. */ |
| 3000 | emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); | 2996 | emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); |
| 3001 | emit_loadi(as, RID_PC, (int32_t)map[nslots]); | 2997 | emit_loadi(as, RID_PC, (int32_t)map[nent]); |
| 3002 | } else if (newbase) { | 2998 | } else if (newbase) { |
| 3003 | /* Save modified BASE for linking to trace with higher start frame. */ | 2999 | /* Save modified BASE for linking to trace with higher start frame. */ |
| 3004 | emit_setgl(as, RID_BASE, jit_base); | 3000 | emit_setgl(as, RID_BASE, jit_base); |
| @@ -3007,51 +3003,50 @@ static void asm_tail_sync(ASMState *as) | |||
| 3007 | emit_addptr(as, RID_BASE, 8*(int32_t)newbase); | 3003 | emit_addptr(as, RID_BASE, 8*(int32_t)newbase); |
| 3008 | 3004 | ||
| 3009 | /* Clear stack slots of newly added frames. */ | 3005 | /* Clear stack slots of newly added frames. */ |
| 3006 | nslots = snap->nslots; | ||
| 3010 | if (nslots <= topslot) { | 3007 | if (nslots <= topslot) { |
| 3011 | if (nslots < topslot) { | 3008 | if (nslots < topslot) { |
| 3009 | BCReg s; | ||
| 3012 | for (s = nslots; s <= topslot; s++) { | 3010 | for (s = nslots; s <= topslot; s++) { |
| 3013 | emit_movtomro(as, RID_EAX, RID_BASE, 8*(int32_t)s-4); | 3011 | emit_movtomro(as, RID_EAX, RID_BASE, 8*((int32_t)s-1)+4); |
| 3014 | checkmclim(as); | 3012 | checkmclim(as); |
| 3015 | } | 3013 | } |
| 3016 | emit_loadi(as, RID_EAX, LJ_TNIL); | 3014 | emit_loadi(as, RID_EAX, LJ_TNIL); |
| 3017 | } else { | 3015 | } else { |
| 3018 | emit_movmroi(as, RID_BASE, 8*(int32_t)nslots-4, LJ_TNIL); | 3016 | emit_movmroi(as, RID_BASE, 8*((int32_t)nslots-1)+4, LJ_TNIL); |
| 3019 | } | 3017 | } |
| 3020 | } | 3018 | } |
| 3021 | 3019 | ||
| 3022 | /* Store the value of all modified slots to the Lua stack. */ | 3020 | /* Store the value of all modified slots to the Lua stack. */ |
| 3023 | for (s = 0; s < nslots; s++) { | 3021 | for (n = 0; n < nent; n++) { |
| 3022 | BCReg s = snap_slot(map[n]); | ||
| 3024 | int32_t ofs = 8*((int32_t)s-1); | 3023 | int32_t ofs = 8*((int32_t)s-1); |
| 3025 | IRRef ref = snap_ref(map[s]); | 3024 | IRRef ref = snap_ref(map[n]); |
| 3026 | if (ref) { | 3025 | IRIns *ir = IR(ref); |
| 3027 | IRIns *ir = IR(ref); | 3026 | /* No need to restore readonly slots and unmodified non-parent slots. */ |
| 3028 | /* No need to restore readonly slots and unmodified non-parent slots. */ | 3027 | if (ir->o == IR_SLOAD && ir->op1 == s && |
| 3029 | if (ir->o == IR_SLOAD && ir->op1 == s && | 3028 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) |
| 3030 | (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | 3029 | continue; |
| 3031 | continue; | 3030 | if (irt_isnum(ir->t)) { |
| 3032 | if (irt_isnum(ir->t)) { | 3031 | Reg src = ra_alloc1(as, ref, RSET_FPR); |
| 3033 | Reg src = ra_alloc1(as, ref, RSET_FPR); | 3032 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); |
| 3034 | emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); | 3033 | } else if (ir->o == IR_FRAME) { |
| 3035 | } else if (ir->o == IR_FRAME) { | 3034 | emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2)))); |
| 3036 | emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2)))); | 3035 | if (s != 0) /* Do not overwrite link to previous frame. */ |
| 3037 | if (s != 0) /* Do not overwrite link to previous frame. */ | 3036 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks)); |
| 3038 | emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks)); | ||
| 3039 | } else { | ||
| 3040 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); | ||
| 3041 | if (!irref_isk(ref)) { | ||
| 3042 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | ||
| 3043 | emit_movtomro(as, src, RID_BASE, ofs); | ||
| 3044 | } else if (!irt_ispri(ir->t)) { | ||
| 3045 | emit_movmroi(as, RID_BASE, ofs, ir->i); | ||
| 3046 | } | ||
| 3047 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | ||
| 3048 | } | ||
| 3049 | } else { | 3037 | } else { |
| 3050 | lua_assert(!(s > secondbase)); | 3038 | lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t)); |
| 3039 | if (!irref_isk(ref)) { | ||
| 3040 | Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); | ||
| 3041 | emit_movtomro(as, src, RID_BASE, ofs); | ||
| 3042 | } else if (!irt_ispri(ir->t)) { | ||
| 3043 | emit_movmroi(as, RID_BASE, ofs, ir->i); | ||
| 3044 | } | ||
| 3045 | emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); | ||
| 3051 | } | 3046 | } |
| 3052 | checkmclim(as); | 3047 | checkmclim(as); |
| 3053 | } | 3048 | } |
| 3054 | lua_assert(map + nslots == flinks-1); | 3049 | lua_assert(map + nent == flinks-1); |
| 3055 | } | 3050 | } |
| 3056 | 3051 | ||
| 3057 | /* Fixup the tail code. */ | 3052 | /* Fixup the tail code. */ |
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index 4fce5eb9..345afb51 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c | |||
| @@ -698,8 +698,8 @@ void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno) | |||
| 698 | lua_State *L = J->L; | 698 | lua_State *L = J->L; |
| 699 | GCproto *pt = &gcref(T->startpt)->pt; | 699 | GCproto *pt = &gcref(T->startpt)->pt; |
| 700 | TraceNo parent = T->ir[REF_BASE].op1; | 700 | TraceNo parent = T->ir[REF_BASE].op1; |
| 701 | uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nslots); | 701 | uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nent); |
| 702 | const BCIns *startpc = (const BCIns *)(uintptr_t)T->snapmap[pcofs]; | 702 | const BCIns *startpc = snap_pc(T->snapmap[pcofs]); |
| 703 | ctx.T = T; | 703 | ctx.T = T; |
| 704 | ctx.mcaddr = (uintptr_t)T->mcode; | 704 | ctx.mcaddr = (uintptr_t)T->mcode; |
| 705 | ctx.szmcode = T->szmcode; | 705 | ctx.szmcode = T->szmcode; |
diff --git a/src/lj_jit.h b/src/lj_jit.h index 36e60113..1a1e407a 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h | |||
| @@ -112,17 +112,27 @@ typedef uint8_t MCode; | |||
| 112 | typedef struct SnapShot { | 112 | typedef struct SnapShot { |
| 113 | uint16_t mapofs; /* Offset into snapshot map. */ | 113 | uint16_t mapofs; /* Offset into snapshot map. */ |
| 114 | IRRef1 ref; /* First IR ref for this snapshot. */ | 114 | IRRef1 ref; /* First IR ref for this snapshot. */ |
| 115 | uint8_t nslots; /* Number of stack slots. */ | 115 | uint8_t nslots; /* Number of valid slots. */ |
| 116 | uint8_t nent; /* Number of compressed entries. */ | ||
| 116 | uint8_t nframelinks; /* Number of frame links. */ | 117 | uint8_t nframelinks; /* Number of frame links. */ |
| 117 | uint8_t count; /* Count of taken exits for this snapshot. */ | 118 | uint8_t count; /* Count of taken exits for this snapshot. */ |
| 118 | uint8_t unused1; | ||
| 119 | } SnapShot; | 119 | } SnapShot; |
| 120 | 120 | ||
| 121 | #define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */ | 121 | #define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */ |
| 122 | 122 | ||
| 123 | /* Snapshot entry. */ | 123 | /* Compressed snapshot entry. */ |
| 124 | typedef uint32_t SnapEntry; | 124 | typedef uint32_t SnapEntry; |
| 125 | |||
| 126 | #define SNAP_FRAME 0x010000 /* Slot has frame link. */ | ||
| 127 | |||
| 128 | #define SNAP(slot, flags, ref) ((SnapEntry)((slot) << 24) + (flags) + (ref)) | ||
| 129 | #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) | ||
| 130 | #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) | ||
| 125 | #define snap_ref(sn) ((sn) & 0xffff) | 131 | #define snap_ref(sn) ((sn) & 0xffff) |
| 132 | #define snap_slot(sn) ((BCReg)((sn) >> 24)) | ||
| 133 | #define snap_isframe(sn) ((sn) & SNAP_FRAME) | ||
| 134 | #define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) | ||
| 135 | #define snap_setref(sn, ref) (((sn) & 0xffff0000) | (ref)) | ||
| 126 | 136 | ||
| 127 | /* Snapshot and exit numbers. */ | 137 | /* Snapshot and exit numbers. */ |
| 128 | typedef uint32_t SnapNo; | 138 | typedef uint32_t SnapNo; |
diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c index 636d5183..90e81526 100644 --- a/src/lj_opt_dce.c +++ b/src/lj_opt_dce.c | |||
| @@ -24,9 +24,9 @@ static void dce_marksnap(jit_State *J) | |||
| 24 | for (i = 0; i < nsnap; i++) { | 24 | for (i = 0; i < nsnap; i++) { |
| 25 | SnapShot *snap = &J->cur.snap[i]; | 25 | SnapShot *snap = &J->cur.snap[i]; |
| 26 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; | 26 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; |
| 27 | BCReg s, nslots = snap->nslots; | 27 | MSize n, nent = snap->nent; |
| 28 | for (s = 0; s < nslots; s++) { | 28 | for (n = 0; n < nent; n++) { |
| 29 | IRRef ref = snap_ref(map[s]); | 29 | IRRef ref = snap_ref(map[n]); |
| 30 | if (!irref_isk(ref)) | 30 | if (!irref_isk(ref)) |
| 31 | irt_setmark(IR(ref)->t); | 31 | irt_setmark(IR(ref)->t); |
| 32 | } | 32 | } |
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index f2950fe9..e5ad5b43 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | 10 | ||
| 11 | #if LJ_HASJIT | 11 | #if LJ_HASJIT |
| 12 | 12 | ||
| 13 | #include "lj_gc.h" | ||
| 14 | #include "lj_err.h" | 13 | #include "lj_err.h" |
| 15 | #include "lj_str.h" | 14 | #include "lj_str.h" |
| 16 | #include "lj_ir.h" | 15 | #include "lj_ir.h" |
| @@ -163,21 +162,69 @@ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi) | |||
| 163 | 162 | ||
| 164 | /* -- Loop unrolling using copy-substitution ------------------------------ */ | 163 | /* -- Loop unrolling using copy-substitution ------------------------------ */ |
| 165 | 164 | ||
| 165 | /* Copy-substitute snapshot. */ | ||
| 166 | static void loop_subst_snap(jit_State *J, SnapShot *osnap, | ||
| 167 | SnapEntry *loopmap, IRRef1 *subst) | ||
| 168 | { | ||
| 169 | SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; | ||
| 170 | MSize nmapofs, nframelinks; | ||
| 171 | MSize on, ln, nn, onent = osnap->nent; | ||
| 172 | BCReg nslots = osnap->nslots; | ||
| 173 | SnapShot *snap = &J->cur.snap[J->cur.nsnap]; | ||
| 174 | if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ | ||
| 175 | nmapofs = J->cur.nsnapmap; | ||
| 176 | J->cur.nsnap++; /* Add new snapshot. */ | ||
| 177 | } else { /* Otherwise overwrite previous snapshot. */ | ||
| 178 | snap--; | ||
| 179 | nmapofs = snap->mapofs; | ||
| 180 | } | ||
| 181 | J->guardemit.irt = 0; | ||
| 182 | nframelinks = osnap->nframelinks; | ||
| 183 | /* Setup new snapshot. */ | ||
| 184 | snap->mapofs = (uint16_t)nmapofs; | ||
| 185 | snap->ref = (IRRef1)J->cur.nins; | ||
| 186 | snap->nframelinks = (uint8_t)nframelinks; | ||
| 187 | snap->nslots = nslots; | ||
| 188 | snap->count = 0; | ||
| 189 | nmap = &J->cur.snapmap[nmapofs]; | ||
| 190 | /* Substitute snapshot slots. */ | ||
| 191 | on = ln = nn = 0; | ||
| 192 | while (on < onent) { | ||
| 193 | SnapEntry osn = omap[on], lsn = loopmap[ln]; | ||
| 194 | if (snap_slot(lsn) < snap_slot(osn)) { /* Copy slot from loop map. */ | ||
| 195 | nmap[nn++] = lsn; | ||
| 196 | ln++; | ||
| 197 | } else { /* Copy substituted slot from snapshot map. */ | ||
| 198 | if (snap_slot(lsn) == snap_slot(osn)) ln++; /* Shadowed loop slot. */ | ||
| 199 | if (!irref_isk(snap_ref(osn))) | ||
| 200 | osn = snap_setref(osn, subst[snap_ref(osn)]); | ||
| 201 | nmap[nn++] = osn; | ||
| 202 | on++; | ||
| 203 | } | ||
| 204 | } | ||
| 205 | while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */ | ||
| 206 | nmap[nn++] = loopmap[ln++]; | ||
| 207 | snap->nent = (uint8_t)nn; | ||
| 208 | J->cur.nsnapmap = (uint16_t)(nmapofs + nn + nframelinks); | ||
| 209 | omap += onent; | ||
| 210 | nmap += nn; | ||
| 211 | for (nn = 0; nn < nframelinks; nn++) /* Copy frame links. */ | ||
| 212 | nmap[nn] = omap[nn]; | ||
| 213 | } | ||
| 214 | |||
| 166 | /* Unroll loop. */ | 215 | /* Unroll loop. */ |
| 167 | static void loop_unroll(jit_State *J) | 216 | static void loop_unroll(jit_State *J) |
| 168 | { | 217 | { |
| 169 | IRRef1 phi[LJ_MAX_PHI]; | 218 | IRRef1 phi[LJ_MAX_PHI]; |
| 170 | uint32_t nphi = 0; | 219 | uint32_t nphi = 0; |
| 171 | IRRef1 *subst; | 220 | IRRef1 *subst; |
| 172 | SnapShot *osnap, *snap; | 221 | SnapShot *osnap; |
| 173 | SnapEntry *loopmap; | 222 | SnapEntry *loopmap, *psentinel; |
| 174 | BCReg loopslots; | 223 | IRRef ins, invar; |
| 175 | MSize nsnap, nsnapmap; | ||
| 176 | IRRef ins, invar, osnapref; | ||
| 177 | 224 | ||
| 178 | /* Use temp buffer for substitution table. | 225 | /* Use temp buffer for substitution table. |
| 179 | ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. | 226 | ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. |
| 180 | ** Note: don't call into the VM or run the GC or the buffer may be gone. | 227 | ** Caveat: don't call into the VM or run the GC or the buffer may be gone. |
| 181 | */ | 228 | */ |
| 182 | invar = J->cur.nins; | 229 | invar = J->cur.nins; |
| 183 | subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, | 230 | subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, |
| @@ -187,80 +234,37 @@ static void loop_unroll(jit_State *J) | |||
| 187 | /* LOOP separates the pre-roll from the loop body. */ | 234 | /* LOOP separates the pre-roll from the loop body. */ |
| 188 | emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); | 235 | emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); |
| 189 | 236 | ||
| 190 | /* Ensure size for copy-substituted snapshots (minus #0 and loop snapshot). */ | 237 | /* Grow snapshot buffer and map for copy-substituted snapshots. |
| 191 | nsnap = J->cur.nsnap; | 238 | ** Need up to twice the number of snapshots minus #0 and loop snapshot. |
| 192 | if (LJ_UNLIKELY(2*nsnap-2 > J->sizesnap)) { | 239 | ** Need up to twice the number of entries plus fallback substitutions |
| 193 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; | 240 | ** from the loop snapshot entries for each new snapshot. |
| 194 | if (2*nsnap-2 > maxsnap) | 241 | ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap! |
| 195 | lj_trace_err(J, LJ_TRERR_SNAPOV); | 242 | */ |
| 196 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); | 243 | { |
| 197 | J->cur.snap = J->snapbuf; | 244 | MSize nsnap = J->cur.nsnap; |
| 198 | } | 245 | SnapShot *loopsnap; |
| 199 | nsnapmap = J->cur.nsnapmap; /* Use temp. copy to avoid undo. */ | 246 | lj_snap_grow_buf(J, 2*nsnap-2); |
| 200 | if (LJ_UNLIKELY(nsnapmap*2 > J->sizesnapmap)) { | 247 | lj_snap_grow_map(J, J->cur.nsnapmap*2+(nsnap-2)*J->cur.snap[nsnap-1].nent); |
| 201 | J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, | ||
| 202 | J->sizesnapmap*sizeof(SnapEntry), | ||
| 203 | 2*J->sizesnapmap*sizeof(SnapEntry)); | ||
| 204 | J->cur.snapmap = J->snapmapbuf; | ||
| 205 | J->sizesnapmap *= 2; | ||
| 206 | } | ||
| 207 | 248 | ||
| 208 | /* The loop snapshot is used for fallback substitutions. */ | 249 | /* The loop snapshot is used for fallback substitutions. */ |
| 209 | snap = &J->cur.snap[nsnap-1]; | 250 | loopsnap = &J->cur.snap[nsnap-1]; |
| 210 | loopmap = &J->cur.snapmap[snap->mapofs]; | 251 | loopmap = &J->cur.snapmap[loopsnap->mapofs]; |
| 211 | loopslots = snap->nslots; | 252 | /* The PC of snapshot #0 and the loop snapshot must match. */ |
| 212 | /* The PC of snapshot #0 and the loop snapshot must match. */ | 253 | psentinel = &loopmap[loopsnap->nent]; |
| 213 | lua_assert(loopmap[loopslots] == J->cur.snapmap[J->cur.snap[0].nslots]); | 254 | lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); |
| 255 | *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ | ||
| 256 | } | ||
| 214 | 257 | ||
| 215 | /* Start substitution with snapshot #1 (#0 is empty for root traces). */ | 258 | /* Start substitution with snapshot #1 (#0 is empty for root traces). */ |
| 216 | osnap = &J->cur.snap[1]; | 259 | osnap = &J->cur.snap[1]; |
| 217 | osnapref = osnap->ref; | ||
| 218 | 260 | ||
| 219 | /* Copy and substitute all recorded instructions and snapshots. */ | 261 | /* Copy and substitute all recorded instructions and snapshots. */ |
| 220 | for (ins = REF_FIRST; ins < invar; ins++) { | 262 | for (ins = REF_FIRST; ins < invar; ins++) { |
| 221 | IRIns *ir; | 263 | IRIns *ir; |
| 222 | IRRef op1, op2; | 264 | IRRef op1, op2; |
| 223 | 265 | ||
| 224 | /* Copy-substitute snapshot. */ | 266 | if (ins >= osnap->ref) /* Instruction belongs to next snapshot? */ |
| 225 | if (ins >= osnapref) { | 267 | loop_subst_snap(J, osnap++, loopmap, subst); /* Copy-substitute it. */ |
| 226 | SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs]; | ||
| 227 | BCReg s, nslots; | ||
| 228 | uint32_t nmapofs, nframelinks; | ||
| 229 | if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ | ||
| 230 | nmapofs = nsnapmap; | ||
| 231 | snap++; /* Add new snapshot. */ | ||
| 232 | } else { | ||
| 233 | nmapofs = snap->mapofs; /* Overwrite previous snapshot. */ | ||
| 234 | } | ||
| 235 | J->guardemit.irt = 0; | ||
| 236 | nslots = osnap->nslots; | ||
| 237 | nframelinks = osnap->nframelinks; | ||
| 238 | snap->mapofs = (uint16_t)nmapofs; | ||
| 239 | snap->ref = (IRRef1)J->cur.nins; | ||
| 240 | snap->nslots = (uint8_t)nslots; | ||
| 241 | snap->nframelinks = (uint8_t)nframelinks; | ||
| 242 | snap->count = 0; | ||
| 243 | osnap++; | ||
| 244 | osnapref = osnap->ref; | ||
| 245 | nsnapmap = nmapofs + nslots + nframelinks; | ||
| 246 | nmap = &J->cur.snapmap[nmapofs]; | ||
| 247 | /* Substitute snapshot slots. */ | ||
| 248 | for (s = 0; s < nslots; s++) { | ||
| 249 | IRRef ref = snap_ref(omap[s]); | ||
| 250 | if (ref) { | ||
| 251 | if (!irref_isk(ref)) | ||
| 252 | ref = subst[ref]; | ||
| 253 | } else if (s < loopslots) { | ||
| 254 | ref = loopmap[s]; | ||
| 255 | } | ||
| 256 | nmap[s] = ref; | ||
| 257 | } | ||
| 258 | /* Copy frame links. */ | ||
| 259 | nmap += nslots; | ||
| 260 | omap += nslots; | ||
| 261 | for (s = 0; s < nframelinks; s++) | ||
| 262 | nmap[s] = omap[s]; | ||
| 263 | } | ||
| 264 | 268 | ||
| 265 | /* Substitute instruction operands. */ | 269 | /* Substitute instruction operands. */ |
| 266 | ir = IR(ins); | 270 | ir = IR(ins); |
| @@ -295,22 +299,24 @@ static void loop_unroll(jit_State *J) | |||
| 295 | } | 299 | } |
| 296 | } | 300 | } |
| 297 | } | 301 | } |
| 298 | if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ | 302 | if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ |
| 299 | J->cur.nsnapmap = (uint16_t)nsnapmap; | 303 | J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs; |
| 300 | snap++; | ||
| 301 | } else { | ||
| 302 | J->cur.nsnapmap = (uint16_t)snap->mapofs; /* Last snapshot is redundant. */ | ||
| 303 | } | ||
| 304 | J->cur.nsnap = (uint16_t)(snap - J->cur.snap); | ||
| 305 | lua_assert(J->cur.nsnapmap <= J->sizesnapmap); | 304 | lua_assert(J->cur.nsnapmap <= J->sizesnapmap); |
| 305 | *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ | ||
| 306 | 306 | ||
| 307 | loop_emit_phi(J, subst, phi, nphi); | 307 | loop_emit_phi(J, subst, phi, nphi); |
| 308 | } | 308 | } |
| 309 | 309 | ||
| 310 | /* Undo any partial changes made by the loop optimization. */ | 310 | /* Undo any partial changes made by the loop optimization. */ |
| 311 | static void loop_undo(jit_State *J, IRRef ins) | 311 | static void loop_undo(jit_State *J, IRRef ins, MSize nsnap) |
| 312 | { | 312 | { |
| 313 | ptrdiff_t i; | 313 | ptrdiff_t i; |
| 314 | SnapShot *snap = &J->cur.snap[nsnap-1]; | ||
| 315 | SnapEntry *map = J->cur.snapmap; | ||
| 316 | map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */ | ||
| 317 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + snap->nframelinks); | ||
| 318 | J->cur.nsnap = nsnap; | ||
| 319 | J->guardemit.irt = 0; | ||
| 314 | lj_ir_rollback(J, ins); | 320 | lj_ir_rollback(J, ins); |
| 315 | for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */ | 321 | for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */ |
| 316 | BPropEntry *bp = &J->bpropcache[i]; | 322 | BPropEntry *bp = &J->bpropcache[i]; |
| @@ -336,6 +342,7 @@ static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) | |||
| 336 | int lj_opt_loop(jit_State *J) | 342 | int lj_opt_loop(jit_State *J) |
| 337 | { | 343 | { |
| 338 | IRRef nins = J->cur.nins; | 344 | IRRef nins = J->cur.nins; |
| 345 | MSize nsnap = J->cur.nsnap; | ||
| 339 | int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); | 346 | int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); |
| 340 | if (LJ_UNLIKELY(errcode)) { | 347 | if (LJ_UNLIKELY(errcode)) { |
| 341 | lua_State *L = J->L; | 348 | lua_State *L = J->L; |
| @@ -348,8 +355,7 @@ int lj_opt_loop(jit_State *J) | |||
| 348 | if (--J->instunroll < 0) /* But do not unroll forever. */ | 355 | if (--J->instunroll < 0) /* But do not unroll forever. */ |
| 349 | break; | 356 | break; |
| 350 | L->top--; /* Remove error object. */ | 357 | L->top--; /* Remove error object. */ |
| 351 | J->guardemit.irt = 0; | 358 | loop_undo(J, nins, nsnap); |
| 352 | loop_undo(J, nins); | ||
| 353 | return 1; /* Loop optimization failed, continue recording. */ | 359 | return 1; /* Loop optimization failed, continue recording. */ |
| 354 | default: | 360 | default: |
| 355 | break; | 361 | break; |
diff --git a/src/lj_record.c b/src/lj_record.c index 6af25ccb..3f442088 100644 --- a/src/lj_record.c +++ b/src/lj_record.c | |||
| @@ -1696,7 +1696,7 @@ static void optstate_comp(jit_State *J, int cond) | |||
| 1696 | const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0); | 1696 | const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0); |
| 1697 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | 1697 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
| 1698 | /* Avoid re-recording the comparison in side traces. */ | 1698 | /* Avoid re-recording the comparison in side traces. */ |
| 1699 | J->cur.snapmap[snap->mapofs + snap->nslots] = u32ptr(npc); | 1699 | J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); |
| 1700 | J->needsnap = 1; | 1700 | J->needsnap = 1; |
| 1701 | /* Shrink last snapshot if possible. */ | 1701 | /* Shrink last snapshot if possible. */ |
| 1702 | if (bc_a(jmpins) < J->maxslot) { | 1702 | if (bc_a(jmpins) < J->maxslot) { |
| @@ -2159,61 +2159,62 @@ static void rec_setup_side(jit_State *J, Trace *T) | |||
| 2159 | { | 2159 | { |
| 2160 | SnapShot *snap = &T->snap[J->exitno]; | 2160 | SnapShot *snap = &T->snap[J->exitno]; |
| 2161 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 2161 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
| 2162 | BCReg s, nslots = snap->nslots; | 2162 | MSize n, nent = snap->nent; |
| 2163 | BloomFilter seen = 0; | 2163 | BloomFilter seen = 0; |
| 2164 | for (s = 0; s < nslots; s++) { | 2164 | /* Emit IR for slots inherited from parent snapshot. */ |
| 2165 | IRRef ref = snap_ref(map[s]); | 2165 | for (n = 0; n < nent; n++) { |
| 2166 | if (ref) { | 2166 | IRRef ref = snap_ref(map[n]); |
| 2167 | IRIns *ir = &T->ir[ref]; | 2167 | BCReg s = snap_slot(map[n]); |
| 2168 | TRef tr = 0; | 2168 | IRIns *ir = &T->ir[ref]; |
| 2169 | /* The bloom filter avoids O(nslots^2) overhead for de-duping slots. */ | 2169 | TRef tr; |
| 2170 | if (bloomtest(seen, ref)) { | 2170 | /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ |
| 2171 | BCReg j; | 2171 | if (bloomtest(seen, ref)) { |
| 2172 | for (j = 0; j < s; j++) | 2172 | MSize j; |
| 2173 | if (snap_ref(map[j]) == ref) { | 2173 | for (j = 0; j < n; j++) |
| 2174 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { | 2174 | if (snap_ref(map[j]) == ref) { |
| 2175 | lua_assert(s != 0); | 2175 | tr = J->slot[snap_slot(map[j])]; |
| 2176 | J->baseslot = s+1; | 2176 | if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { |
| 2177 | J->framedepth++; | 2177 | lua_assert(s != 0); |
| 2178 | } | ||
| 2179 | tr = J->slot[j]; | ||
| 2180 | goto dupslot; | ||
| 2181 | } | ||
| 2182 | } | ||
| 2183 | bloomset(seen, ref); | ||
| 2184 | switch ((IROp)ir->o) { | ||
| 2185 | case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break; | ||
| 2186 | case IR_KINT: tr = lj_ir_kint(J, ir->i); break; | ||
| 2187 | case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; | ||
| 2188 | case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; | ||
| 2189 | case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ | ||
| 2190 | if (irt_isfunc(ir->t)) { | ||
| 2191 | if (s != 0) { | ||
| 2192 | J->baseslot = s+1; | 2178 | J->baseslot = s+1; |
| 2193 | J->framedepth++; | 2179 | J->framedepth++; |
| 2194 | } | 2180 | } |
| 2195 | tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); | 2181 | goto dupslot; |
| 2196 | tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); | ||
| 2197 | } else { | ||
| 2198 | tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void)); | ||
| 2199 | tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr); | ||
| 2200 | } | 2182 | } |
| 2201 | break; | 2183 | } |
| 2202 | case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */ | 2184 | bloomset(seen, ref); |
| 2203 | tr = emitir_raw(ir->ot & ~IRT_GUARD, s, | 2185 | switch ((IROp)ir->o) { |
| 2204 | (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); | 2186 | /* Only have to deal with constants that can occur in stack slots. */ |
| 2205 | break; | 2187 | case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break; |
| 2206 | default: /* Parent refs are already typed and don't need a guard. */ | 2188 | case IR_KINT: tr = lj_ir_kint(J, ir->i); break; |
| 2207 | tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, | 2189 | case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; |
| 2208 | IRSLOAD_INHERIT|IRSLOAD_PARENT); | 2190 | case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break; |
| 2209 | break; | 2191 | case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */ |
| 2192 | if (irt_isfunc(ir->t)) { | ||
| 2193 | if (s != 0) { | ||
| 2194 | J->baseslot = s+1; | ||
| 2195 | J->framedepth++; | ||
| 2196 | } | ||
| 2197 | tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); | ||
| 2198 | tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr); | ||
| 2199 | } else { | ||
| 2200 | tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void)); | ||
| 2201 | tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr); | ||
| 2210 | } | 2202 | } |
| 2211 | dupslot: | 2203 | break; |
| 2212 | J->slot[s] = tr; | 2204 | case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */ |
| 2205 | tr = emitir_raw(ir->ot & ~IRT_GUARD, s, | ||
| 2206 | (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); | ||
| 2207 | break; | ||
| 2208 | default: /* Parent refs are already typed and don't need a guard. */ | ||
| 2209 | tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, | ||
| 2210 | IRSLOAD_INHERIT|IRSLOAD_PARENT); | ||
| 2211 | break; | ||
| 2213 | } | 2212 | } |
| 2213 | dupslot: | ||
| 2214 | J->slot[s] = tr; | ||
| 2214 | } | 2215 | } |
| 2215 | J->base = J->slot + J->baseslot; | 2216 | J->base = J->slot + J->baseslot; |
| 2216 | J->maxslot = nslots - J->baseslot; | 2217 | J->maxslot = snap->nslots - J->baseslot; |
| 2217 | lj_snap_add(J); | 2218 | lj_snap_add(J); |
| 2218 | } | 2219 | } |
| 2219 | 2220 | ||
| @@ -2259,7 +2260,7 @@ void lj_record_setup(jit_State *J) | |||
| 2259 | J->cur.root = (uint16_t)root; | 2260 | J->cur.root = (uint16_t)root; |
| 2260 | J->cur.startins = BCINS_AD(BC_JMP, 0, 0); | 2261 | J->cur.startins = BCINS_AD(BC_JMP, 0, 0); |
| 2261 | /* Check whether we could at least potentially form an extra loop. */ | 2262 | /* Check whether we could at least potentially form an extra loop. */ |
| 2262 | if (J->exitno == 0 && T->snap[0].nslots == 1 && T->snapmap[0] == 0) { | 2263 | if (J->exitno == 0 && T->snap[0].nent == 0) { |
| 2263 | /* We can narrow a FORL for some side traces, too. */ | 2264 | /* We can narrow a FORL for some side traces, too. */ |
| 2264 | if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI && | 2265 | if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI && |
| 2265 | bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { | 2266 | bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { |
diff --git a/src/lj_snap.c b/src/lj_snap.c index f262e1c9..d22c90a4 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c | |||
| @@ -23,28 +23,50 @@ | |||
| 23 | /* Some local macros to save typing. Undef'd at the end. */ | 23 | /* Some local macros to save typing. Undef'd at the end. */ |
| 24 | #define IR(ref) (&J->cur.ir[(ref)]) | 24 | #define IR(ref) (&J->cur.ir[(ref)]) |
| 25 | 25 | ||
| 26 | /* -- Snapshot buffer allocation ------------------------------------------ */ | ||
| 27 | |||
| 28 | /* Grow snapshot buffer. */ | ||
| 29 | void lj_snap_grow_buf_(jit_State *J, MSize need) | ||
| 30 | { | ||
| 31 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; | ||
| 32 | if (need > maxsnap) | ||
| 33 | lj_trace_err(J, LJ_TRERR_SNAPOV); | ||
| 34 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); | ||
| 35 | J->cur.snap = J->snapbuf; | ||
| 36 | } | ||
| 37 | |||
| 38 | /* Grow snapshot map buffer. */ | ||
| 39 | void lj_snap_grow_map_(jit_State *J, MSize need) | ||
| 40 | { | ||
| 41 | if (need < 2*J->sizesnapmap) | ||
| 42 | need = 2*J->sizesnapmap; | ||
| 43 | else if (need < 64) | ||
| 44 | need = 64; | ||
| 45 | J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, | ||
| 46 | J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry)); | ||
| 47 | J->cur.snapmap = J->snapmapbuf; | ||
| 48 | J->sizesnapmap = need; | ||
| 49 | } | ||
| 50 | |||
| 26 | /* -- Snapshot generation ------------------------------------------------- */ | 51 | /* -- Snapshot generation ------------------------------------------------- */ |
| 27 | 52 | ||
| 28 | /* NYI: Snapshots are in need of a redesign. The current storage model for | 53 | /* NYI: IR_FRAME should be eliminated, too. */ |
| 29 | ** snapshot maps is too wasteful. They could be compressed (1D or 2D) and | ||
| 30 | ** made more flexible at the same time. Iterators should no longer need to | ||
| 31 | ** skip unmodified slots. IR_FRAME should be eliminated, too. | ||
| 32 | */ | ||
| 33 | 54 | ||
| 34 | /* Add all modified slots to the snapshot. */ | 55 | /* Add all modified slots to the snapshot. */ |
| 35 | static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | 56 | static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) |
| 36 | { | 57 | { |
| 37 | BCReg s; | 58 | BCReg s; |
| 59 | MSize n = 0; | ||
| 38 | for (s = 0; s < nslots; s++) { | 60 | for (s = 0; s < nslots; s++) { |
| 39 | IRRef ref = tref_ref(J->slot[s]); | 61 | IRRef ref = tref_ref(J->slot[s]); |
| 40 | if (ref) { | 62 | if (ref) { |
| 41 | IRIns *ir = IR(ref); | 63 | IRIns *ir = IR(ref); |
| 42 | if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT)) | 64 | if (!(ir->o == IR_SLOAD && ir->op1 == s && |
| 43 | ref = 0; | 65 | !(ir->op2 & IRSLOAD_INHERIT))) |
| 66 | map[n++] = SNAP(s, ir->o == IR_FRAME ? SNAP_FRAME : 0, ref); | ||
| 44 | } | 67 | } |
| 45 | map[s] = (SnapEntry)ref; | ||
| 46 | } | 68 | } |
| 47 | return nslots; | 69 | return n; |
| 48 | } | 70 | } |
| 49 | 71 | ||
| 50 | /* Add frame links at the end of the snapshot. */ | 72 | /* Add frame links at the end of the snapshot. */ |
| @@ -53,17 +75,17 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map) | |||
| 53 | cTValue *frame = J->L->base - 1; | 75 | cTValue *frame = J->L->base - 1; |
| 54 | cTValue *lim = J->L->base - J->baseslot; | 76 | cTValue *lim = J->L->base - J->baseslot; |
| 55 | MSize f = 0; | 77 | MSize f = 0; |
| 56 | map[f++] = u32ptr(J->pc); | 78 | map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ |
| 57 | while (frame > lim) { | 79 | while (frame > lim) { /* Backwards traversal of all frames above base. */ |
| 58 | if (frame_islua(frame)) { | 80 | if (frame_islua(frame)) { |
| 59 | map[f++] = u32ptr(frame_pc(frame)); | 81 | map[f++] = SNAP_MKPC(frame_pc(frame)); |
| 60 | frame = frame_prevl(frame); | 82 | frame = frame_prevl(frame); |
| 61 | } else if (frame_ispcall(frame)) { | 83 | } else if (frame_ispcall(frame)) { |
| 62 | map[f++] = (uint32_t)frame_ftsz(frame); | 84 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
| 63 | frame = frame_prevd(frame); | 85 | frame = frame_prevd(frame); |
| 64 | } else if (frame_iscont(frame)) { | 86 | } else if (frame_iscont(frame)) { |
| 65 | map[f++] = (uint32_t)frame_ftsz(frame); | 87 | map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); |
| 66 | map[f++] = u32ptr(frame_contpc(frame)); | 88 | map[f++] = SNAP_MKPC(frame_contpc(frame)); |
| 67 | frame = frame_prevd(frame); | 89 | frame = frame_prevd(frame); |
| 68 | } else { | 90 | } else { |
| 69 | lua_assert(0); | 91 | lua_assert(0); |
| @@ -76,28 +98,19 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map) | |||
| 76 | static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) | 98 | static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) |
| 77 | { | 99 | { |
| 78 | BCReg nslots = J->baseslot + J->maxslot; | 100 | BCReg nslots = J->baseslot + J->maxslot; |
| 79 | MSize nsm, nframelinks; | 101 | MSize nent, nframelinks; |
| 80 | SnapEntry *p; | 102 | SnapEntry *p; |
| 81 | /* Conservative estimate. Continuation frames need 2 slots. */ | 103 | /* Conservative estimate. Continuation frames need 2 slots. */ |
| 82 | nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1; | 104 | lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth*2+1); |
| 83 | if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */ | ||
| 84 | if (nsm < 2*J->sizesnapmap) | ||
| 85 | nsm = 2*J->sizesnapmap; | ||
| 86 | else if (nsm < 64) | ||
| 87 | nsm = 64; | ||
| 88 | J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, | ||
| 89 | J->sizesnapmap*sizeof(SnapEntry), nsm*sizeof(SnapEntry)); | ||
| 90 | J->cur.snapmap = J->snapmapbuf; | ||
| 91 | J->sizesnapmap = nsm; | ||
| 92 | } | ||
| 93 | p = &J->cur.snapmap[nsnapmap]; | 105 | p = &J->cur.snapmap[nsnapmap]; |
| 94 | nslots = snapshot_slots(J, p, nslots); | 106 | nent = snapshot_slots(J, p, nslots); |
| 95 | nframelinks = snapshot_framelinks(J, p + nslots); | 107 | nframelinks = snapshot_framelinks(J, p + nent); |
| 96 | J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks); | 108 | J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + nframelinks); |
| 97 | snap->mapofs = (uint16_t)nsnapmap; | 109 | snap->mapofs = (uint16_t)nsnapmap; |
| 98 | snap->ref = (IRRef1)J->cur.nins; | 110 | snap->ref = (IRRef1)J->cur.nins; |
| 99 | snap->nslots = (uint8_t)nslots; | 111 | snap->nent = (uint8_t)nent; |
| 100 | snap->nframelinks = (uint8_t)nframelinks; | 112 | snap->nframelinks = (uint8_t)nframelinks; |
| 113 | snap->nslots = (uint8_t)nslots; | ||
| 101 | snap->count = 0; | 114 | snap->count = 0; |
| 102 | } | 115 | } |
| 103 | 116 | ||
| @@ -111,14 +124,7 @@ void lj_snap_add(jit_State *J) | |||
| 111 | (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { | 124 | (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { |
| 112 | nsnapmap = J->cur.snap[--nsnap].mapofs; | 125 | nsnapmap = J->cur.snap[--nsnap].mapofs; |
| 113 | } else { | 126 | } else { |
| 114 | /* Need to grow snapshot buffer? */ | 127 | lj_snap_grow_buf(J, nsnap+1); |
| 115 | if (LJ_UNLIKELY(nsnap >= J->sizesnap)) { | ||
| 116 | MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; | ||
| 117 | if (nsnap >= maxsnap) | ||
| 118 | lj_trace_err(J, LJ_TRERR_SNAPOV); | ||
| 119 | lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); | ||
| 120 | J->cur.snap = J->snapbuf; | ||
| 121 | } | ||
| 122 | J->cur.nsnap = (uint16_t)(nsnap+1); | 128 | J->cur.nsnap = (uint16_t)(nsnap+1); |
| 123 | } | 129 | } |
| 124 | J->mergesnap = 0; | 130 | J->mergesnap = 0; |
| @@ -131,14 +137,21 @@ void lj_snap_shrink(jit_State *J) | |||
| 131 | { | 137 | { |
| 132 | BCReg nslots = J->baseslot + J->maxslot; | 138 | BCReg nslots = J->baseslot + J->maxslot; |
| 133 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | 139 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; |
| 134 | SnapEntry *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots]; | 140 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; |
| 135 | SnapEntry *nflinks = &J->cur.snapmap[snap->mapofs + nslots]; | 141 | MSize nent = snap->nent; |
| 136 | uint32_t s, nframelinks = snap->nframelinks; | ||
| 137 | lua_assert(nslots < snap->nslots); | 142 | lua_assert(nslots < snap->nslots); |
| 138 | snap->nslots = (uint8_t)nslots; | 143 | snap->nslots = (uint8_t)nslots; |
| 139 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks); | 144 | if (nent > 0 && snap_slot(map[nent-1]) >= nslots) { |
| 140 | for (s = 0; s < nframelinks; s++) /* Move frame links down. */ | 145 | MSize s, delta, nframelinks = snap->nframelinks; |
| 141 | nflinks[s] = oflinks[s]; | 146 | for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--) |
| 147 | ; | ||
| 148 | delta = snap->nent - nent; | ||
| 149 | snap->nent = (uint8_t)nent; | ||
| 150 | J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + nframelinks); | ||
| 151 | map += nent; | ||
| 152 | for (s = 0; s < nframelinks; s++) /* Move frame links down. */ | ||
| 153 | map[s] = map[s+delta]; | ||
| 154 | } | ||
| 142 | } | 155 | } |
| 143 | 156 | ||
| 144 | /* -- Snapshot access ----------------------------------------------------- */ | 157 | /* -- Snapshot access ----------------------------------------------------- */ |
| @@ -167,21 +180,24 @@ static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs) | |||
| 167 | return rs; | 180 | return rs; |
| 168 | } | 181 | } |
| 169 | 182 | ||
| 170 | /* Convert a snapshot into a linear slot -> RegSP map. */ | 183 | /* Convert a snapshot into a linear slot -> RegSP map. |
| 184 | ** Note: unused slots are not initialized! | ||
| 185 | */ | ||
| 171 | void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) | 186 | void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) |
| 172 | { | 187 | { |
| 173 | SnapShot *snap = &T->snap[snapno]; | 188 | SnapShot *snap = &T->snap[snapno]; |
| 174 | BCReg s, nslots = snap->nslots; | 189 | MSize n, nent = snap->nent; |
| 175 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 190 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
| 176 | BloomFilter rfilt = snap_renamefilter(T, snapno); | 191 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
| 177 | for (s = 0; s < nslots; s++) { | 192 | for (n = 0; n < nent; n++) { |
| 178 | IRRef ref = snap_ref(map[s]); | 193 | SnapEntry sn = map[n]; |
| 194 | IRRef ref = snap_ref(sn); | ||
| 179 | if (!irref_isk(ref)) { | 195 | if (!irref_isk(ref)) { |
| 180 | IRIns *ir = &T->ir[ref]; | 196 | IRIns *ir = &T->ir[ref]; |
| 181 | uint32_t rs = ir->prev; | 197 | uint32_t rs = ir->prev; |
| 182 | if (bloomtest(rfilt, ref)) | 198 | if (bloomtest(rfilt, ref)) |
| 183 | rs = snap_renameref(T, snapno, ref, rs); | 199 | rs = snap_renameref(T, snapno, ref, rs); |
| 184 | rsmap[s] = (uint16_t)rs; | 200 | rsmap[snap_slot(sn)] = (uint16_t)rs; |
| 185 | } | 201 | } |
| 186 | } | 202 | } |
| 187 | } | 203 | } |
| @@ -193,89 +209,88 @@ void lj_snap_restore(jit_State *J, void *exptr) | |||
| 193 | SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ | 209 | SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ |
| 194 | Trace *T = J->trace[J->parent]; | 210 | Trace *T = J->trace[J->parent]; |
| 195 | SnapShot *snap = &T->snap[snapno]; | 211 | SnapShot *snap = &T->snap[snapno]; |
| 196 | BCReg s, nslots = snap->nslots; | 212 | MSize n, nent = snap->nent; |
| 197 | SnapEntry *map = &T->snapmap[snap->mapofs]; | 213 | SnapEntry *map = &T->snapmap[snap->mapofs]; |
| 198 | SnapEntry *flinks = map + nslots + snap->nframelinks; | 214 | SnapEntry *flinks = map + nent + snap->nframelinks; |
| 199 | TValue *o, *newbase, *ntop; | 215 | BCReg nslots = snap->nslots; |
| 216 | TValue *frame; | ||
| 200 | BloomFilter rfilt = snap_renamefilter(T, snapno); | 217 | BloomFilter rfilt = snap_renamefilter(T, snapno); |
| 201 | lua_State *L = J->L; | 218 | lua_State *L = J->L; |
| 202 | 219 | ||
| 203 | /* Make sure the stack is big enough for the slots from the snapshot. */ | 220 | /* Make sure the stack is big enough for the slots from the snapshot. */ |
| 204 | if (L->base + nslots >= L->maxstack) { | 221 | if (LJ_UNLIKELY(L->base + nslots > L->maxstack)) { |
| 205 | L->top = curr_topL(L); | 222 | L->top = curr_topL(L); |
| 206 | lj_state_growstack(L, nslots - curr_proto(L)->framesize); | 223 | lj_state_growstack(L, nslots - curr_proto(L)->framesize); |
| 207 | } | 224 | } |
| 208 | 225 | ||
| 209 | /* Fill stack slots with data from the registers and spill slots. */ | 226 | /* Fill stack slots with data from the registers and spill slots. */ |
| 210 | newbase = NULL; | 227 | frame = L->base-1; |
| 211 | ntop = L->base; | 228 | for (n = 0; n < nent; n++) { |
| 212 | for (s = 0, o = L->base-1; s < nslots; s++, o++) { | 229 | IRRef ref = snap_ref(map[n]); |
| 213 | IRRef ref = snap_ref(map[s]); | 230 | BCReg s = snap_slot(map[n]); |
| 214 | if (ref) { | 231 | TValue *o = &frame[s]; /* Stack slots are relative to start frame. */ |
| 215 | IRIns *ir = &T->ir[ref]; | 232 | IRIns *ir = &T->ir[ref]; |
| 216 | if (irref_isk(ref)) { /* Restore constant slot. */ | 233 | if (irref_isk(ref)) { /* Restore constant slot. */ |
| 217 | lj_ir_kvalue(L, o, ir); | 234 | lj_ir_kvalue(L, o, ir); |
| 218 | } else { | 235 | } else { |
| 219 | IRType1 t = ir->t; | 236 | IRType1 t = ir->t; |
| 220 | RegSP rs = ir->prev; | 237 | RegSP rs = ir->prev; |
| 221 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) | 238 | if (LJ_UNLIKELY(bloomtest(rfilt, ref))) |
| 222 | rs = snap_renameref(T, snapno, ref, rs); | 239 | rs = snap_renameref(T, snapno, ref, rs); |
| 223 | if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ | 240 | if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ |
| 224 | int32_t *sps = &ex->spill[regsp_spill(rs)]; | 241 | int32_t *sps = &ex->spill[regsp_spill(rs)]; |
| 225 | if (irt_isinteger(t)) { | 242 | if (irt_isinteger(t)) { |
| 226 | setintV(o, *sps); | 243 | setintV(o, *sps); |
| 227 | } else if (irt_isnum(t)) { | 244 | } else if (irt_isnum(t)) { |
| 228 | o->u64 = *(uint64_t *)sps; | 245 | o->u64 = *(uint64_t *)sps; |
| 229 | } else { | 246 | } else { |
| 230 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ | 247 | lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ |
| 231 | setgcrefi(o->gcr, *sps); | 248 | setgcrefi(o->gcr, *sps); |
| 232 | setitype(o, irt_toitype(t)); | 249 | setitype(o, irt_toitype(t)); |
| 233 | } | 250 | } |
| 234 | } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ | 251 | } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ |
| 235 | Reg r = regsp_reg(rs); | 252 | Reg r = regsp_reg(rs); |
| 236 | if (irt_isinteger(t)) { | 253 | if (irt_isinteger(t)) { |
| 237 | setintV(o, ex->gpr[r-RID_MIN_GPR]); | 254 | setintV(o, ex->gpr[r-RID_MIN_GPR]); |
| 238 | } else if (irt_isnum(t)) { | 255 | } else if (irt_isnum(t)) { |
| 239 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); | 256 | setnumV(o, ex->fpr[r-RID_MIN_FPR]); |
| 240 | } else { | 257 | } else { |
| 241 | if (!irt_ispri(t)) | 258 | if (!irt_ispri(t)) |
| 242 | setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); | 259 | setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); |
| 243 | setitype(o, irt_toitype(t)); | 260 | setitype(o, irt_toitype(t)); |
| 244 | } | 261 | } |
| 245 | } else { /* Restore frame slot. */ | 262 | } else { /* Restore frame slot. */ |
| 246 | lua_assert(ir->o == IR_FRAME); | 263 | lua_assert(ir->o == IR_FRAME); |
| 247 | /* This works for both PTR and FUNC IR_FRAME. */ | 264 | /* This works for both PTR and FUNC IR_FRAME. */ |
| 248 | setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); | 265 | setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); |
| 249 | if (s != 0) /* Do not overwrite link to previous frame. */ | 266 | if (s != 0) /* Do not overwrite link to previous frame. */ |
| 250 | o->fr.tp.ftsz = (int32_t)*--flinks; | 267 | o->fr.tp.ftsz = (int32_t)*--flinks; |
| 251 | if (irt_isfunc(ir->t)) { | 268 | if (irt_isfunc(ir->t)) { |
| 252 | GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); | 269 | GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); |
| 253 | if (isluafunc(fn)) { | 270 | if (isluafunc(fn)) { |
| 254 | TValue *fs; | 271 | MSize framesize = funcproto(fn)->framesize; |
| 255 | fs = o+1 + funcproto(fn)->framesize; | 272 | TValue *fs; |
| 256 | if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ | 273 | L->base = ++o; |
| 257 | if (s != 0) newbase = o+1; | 274 | if (LJ_UNLIKELY(o + framesize > L->maxstack)) { /* Grow again? */ |
| 275 | ptrdiff_t fsave = savestack(L, frame); | ||
| 276 | L->top = o; | ||
| 277 | lj_state_growstack(L, framesize); | ||
| 278 | frame = restorestack(L, fsave); | ||
| 279 | o = L->top; | ||
| 258 | } | 280 | } |
| 281 | fs = o + framesize; | ||
| 282 | if (s == 0) /* Only partially clear tail call frame at #0. */ | ||
| 283 | o = &frame[nslots]; | ||
| 284 | while (o < fs) /* Clear slots of newly added frames. */ | ||
| 285 | setnilV(o++); | ||
| 259 | } | 286 | } |
| 260 | } | 287 | } |
| 261 | } | 288 | } |
| 262 | } else { | ||
| 263 | lua_assert(!newbase); | ||
| 264 | } | 289 | } |
| 265 | } | 290 | } |
| 266 | if (newbase) L->base = newbase; | ||
| 267 | if (ntop >= L->maxstack) { /* Need to grow the stack again. */ | ||
| 268 | MSize need = (MSize)(ntop - o); | ||
| 269 | L->top = o; | ||
| 270 | lj_state_growstack(L, need); | ||
| 271 | o = L->top; | ||
| 272 | ntop = o + need; | ||
| 273 | } | ||
| 274 | L->top = curr_topL(L); | 291 | L->top = curr_topL(L); |
| 275 | for (; o < ntop; o++) /* Clear remainder of newly added frames. */ | 292 | J->pc = snap_pc(*--flinks); |
| 276 | setnilV(o); | 293 | lua_assert(map + nent == flinks); |
| 277 | lua_assert(map + nslots == flinks-1); | ||
| 278 | J->pc = (const BCIns *)(uintptr_t)(*--flinks); | ||
| 279 | } | 294 | } |
| 280 | 295 | ||
| 281 | #undef IR | 296 | #undef IR |
diff --git a/src/lj_snap.h b/src/lj_snap.h index 776a0bcf..ed7d98a1 100644 --- a/src/lj_snap.h +++ b/src/lj_snap.h | |||
| @@ -14,6 +14,19 @@ LJ_FUNC void lj_snap_add(jit_State *J); | |||
| 14 | LJ_FUNC void lj_snap_shrink(jit_State *J); | 14 | LJ_FUNC void lj_snap_shrink(jit_State *J); |
| 15 | LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno); | 15 | LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno); |
| 16 | LJ_FUNC void lj_snap_restore(jit_State *J, void *exptr); | 16 | LJ_FUNC void lj_snap_restore(jit_State *J, void *exptr); |
| 17 | LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); | ||
| 18 | LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need); | ||
| 19 | |||
| 20 | static LJ_AINLINE void lj_snap_grow_buf(jit_State *J, MSize need) | ||
| 21 | { | ||
| 22 | if (LJ_UNLIKELY(need > J->sizesnap)) lj_snap_grow_buf_(J, need); | ||
| 23 | } | ||
| 24 | |||
| 25 | static LJ_AINLINE void lj_snap_grow_map(jit_State *J, MSize need) | ||
| 26 | { | ||
| 27 | if (LJ_UNLIKELY(need > J->sizesnapmap)) lj_snap_grow_map_(J, need); | ||
| 28 | } | ||
| 29 | |||
| 17 | #endif | 30 | #endif |
| 18 | 31 | ||
| 19 | #endif | 32 | #endif |
diff --git a/src/lj_trace.c b/src/lj_trace.c index a5468655..fb36c7ee 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c | |||
| @@ -161,8 +161,8 @@ void lj_trace_reenableproto(GCproto *pt) | |||
| 161 | static void trace_unpatch(jit_State *J, Trace *T) | 161 | static void trace_unpatch(jit_State *J, Trace *T) |
| 162 | { | 162 | { |
| 163 | BCOp op = bc_op(T->startins); | 163 | BCOp op = bc_op(T->startins); |
| 164 | uint32_t pcofs = T->snap[0].mapofs + T->snap[0].nslots; | 164 | MSize pcofs = T->snap[0].mapofs + T->snap[0].nent; |
| 165 | BCIns *pc = ((BCIns *)(uintptr_t)T->snapmap[pcofs]) - 1; | 165 | BCIns *pc = ((BCIns *)snap_pc(T->snapmap[pcofs])) - 1; |
| 166 | switch (op) { | 166 | switch (op) { |
| 167 | case BC_FORL: | 167 | case BC_FORL: |
| 168 | lua_assert(bc_op(*pc) == BC_JFORI); | 168 | lua_assert(bc_op(*pc) == BC_JFORI); |
| @@ -352,7 +352,6 @@ static void trace_start(jit_State *J) | |||
| 352 | J->cur.ir = J->irbuf; | 352 | J->cur.ir = J->irbuf; |
| 353 | J->cur.snap = J->snapbuf; | 353 | J->cur.snap = J->snapbuf; |
| 354 | J->cur.snapmap = J->snapmapbuf; | 354 | J->cur.snapmap = J->snapmapbuf; |
| 355 | /* J->cur.nsnapmap = 0; */ | ||
| 356 | J->mergesnap = 0; | 355 | J->mergesnap = 0; |
| 357 | J->needsnap = 0; | 356 | J->needsnap = 0; |
| 358 | J->guardemit.irt = 0; | 357 | J->guardemit.irt = 0; |
