aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Makefile.dep6
-rw-r--r--src/lib_jit.c21
-rw-r--r--src/lj_asm.c101
-rw-r--r--src/lj_gdbjit.c4
-rw-r--r--src/lj_jit.h16
-rw-r--r--src/lj_opt_dce.c6
-rw-r--r--src/lj_opt_loop.c168
-rw-r--r--src/lj_record.c97
-rw-r--r--src/lj_snap.c247
-rw-r--r--src/lj_snap.h13
-rw-r--r--src/lj_trace.c5
11 files changed, 365 insertions, 319 deletions
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 779ee545..ffb7d79b 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -11,7 +11,7 @@ buildvm_lib.o: buildvm_lib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
11buildvm_peobj.o: buildvm_peobj.c buildvm.h lj_def.h lua.h luaconf.h \ 11buildvm_peobj.o: buildvm_peobj.c buildvm.h lj_def.h lua.h luaconf.h \
12 lj_arch.h lj_bc.h 12 lj_arch.h lj_bc.h
13lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ 13lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
14 lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_alloc.h 14 lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_lib.h lj_alloc.h
15lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ 15lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
16 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ 16 lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \
17 lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h 17 lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h
@@ -87,8 +87,8 @@ lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
87 lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \ 87 lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \
88 lj_traceerr.h lj_vm.h lj_folddef.h 88 lj_traceerr.h lj_vm.h lj_folddef.h
89lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 89lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
90 lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \ 90 lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
91 lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h 91 lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
92lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ 92lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
93 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h 93 lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
94lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ 94lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
diff --git a/src/lib_jit.c b/src/lib_jit.c
index aa421613..f3425d98 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -332,18 +332,25 @@ LJLIB_CF(jit_util_tracesnap)
332 if (T && sn < T->nsnap) { 332 if (T && sn < T->nsnap) {
333 SnapShot *snap = &T->snap[sn]; 333 SnapShot *snap = &T->snap[sn];
334 SnapEntry *map = &T->snapmap[snap->mapofs]; 334 SnapEntry *map = &T->snapmap[snap->mapofs];
335 BCReg s, nslots = snap->nslots; 335 MSize n, nent = snap->nent;
336 BCReg nslots = snap->nslots;
336 GCtab *t; 337 GCtab *t;
337 lua_createtable(L, nslots ? (int)nslots : 1, 0); 338 lua_createtable(L, nslots ? (int)nslots : 1, 0);
338 t = tabV(L->top-1); 339 t = tabV(L->top-1);
339 setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS); 340 setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS);
340 for (s = 0; s < nslots; s++) { 341 /* NYI: get rid of this and expose the compressed slot map. */
341 TValue *o = lj_tab_setint(L, t, (int32_t)(s+1)); 342 {
342 IRRef ref = snap_ref(map[s]); 343 BCReg s;
343 if (ref) 344 for (s = 0; s < nslots; s++) {
344 setintV(o, (int32_t)ref - REF_BIAS); 345 TValue *o = lj_tab_setint(L, t, (int32_t)(s+1));
345 else
346 setboolV(o, 0); 346 setboolV(o, 0);
347 }
348 }
349 for (n = 0; n < nent; n++) {
350 BCReg s = snap_slot(map[n]);
351 IRRef ref = snap_ref(map[n]);
352 TValue *o = lj_tab_setint(L, t, (int32_t)(s+1));
353 setintV(o, (int32_t)ref - REF_BIAS);
347 } 354 }
348 return 1; 355 return 1;
349 } 356 }
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 9f5ce030..b3656e00 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -926,9 +926,9 @@ static void asm_snap_alloc(ASMState *as)
926{ 926{
927 SnapShot *snap = &as->T->snap[as->snapno]; 927 SnapShot *snap = &as->T->snap[as->snapno];
928 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 928 SnapEntry *map = &as->T->snapmap[snap->mapofs];
929 BCReg s, nslots = snap->nslots; 929 MSize n, nent = snap->nent;
930 for (s = 0; s < nslots; s++) { 930 for (n = 0; n < nent; n++) {
931 IRRef ref = snap_ref(map[s]); 931 IRRef ref = snap_ref(map[n]);
932 if (!irref_isk(ref)) { 932 if (!irref_isk(ref)) {
933 IRIns *ir = IR(ref); 933 IRIns *ir = IR(ref);
934 if (!ra_used(ir) && ir->o != IR_FRAME) { 934 if (!ra_used(ir) && ir->o != IR_FRAME) {
@@ -960,9 +960,9 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren)
960{ 960{
961 SnapShot *snap = &as->T->snap[as->snapno]; 961 SnapShot *snap = &as->T->snap[as->snapno];
962 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 962 SnapEntry *map = &as->T->snapmap[snap->mapofs];
963 BCReg s, nslots = snap->nslots; 963 MSize n, nent = snap->nent;
964 for (s = 0; s < nslots; s++) { 964 for (n = 0; n < nent; n++) {
965 IRRef ref = snap_ref(map[s]); 965 IRRef ref = snap_ref(map[n]);
966 if (ref == ren) { 966 if (ref == ren) {
967 IRIns *ir = IR(ref); 967 IRIns *ir = IR(ref);
968 ra_spill(as, ir); /* Register renamed, so force a spill slot. */ 968 ra_spill(as, ir); /* Register renamed, so force a spill slot. */
@@ -2465,18 +2465,17 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base)
2465 */ 2465 */
2466 RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base); 2466 RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base);
2467 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 2467 SnapEntry *map = &as->T->snapmap[snap->mapofs];
2468 BCReg s, nslots = snap->nslots; 2468 MSize n, nent = snap->nent;
2469 for (s = 0; s < nslots; s++) { 2469 for (n = 0; n < nent; n++) {
2470 IRRef ref = snap_ref(map[s]); 2470 IRRef ref = snap_ref(map[n]);
2471 if (!irref_isk(ref)) { 2471 if (!irref_isk(ref)) {
2472 int32_t ofs = 8*(int32_t)(snap_slot(map[n])-1);
2472 IRIns *ir = IR(ref); 2473 IRIns *ir = IR(ref);
2473 if (ir->o == IR_FRAME) { 2474 if (ir->o == IR_FRAME) {
2474 /* NYI: sync the frame, bump base, set topslot, clear new slots. */ 2475 /* NYI: sync the frame, bump base, set topslot, clear new slots. */
2475 lj_trace_err(as->J, LJ_TRERR_NYIGCF); 2476 lj_trace_err(as->J, LJ_TRERR_NYIGCF);
2476 } else if (irt_isgcv(ir->t) && 2477 } else if (irt_isgcv(ir->t)) {
2477 !(ir->o == IR_SLOAD && ir->op1 < nslots && map[ir->op1] == 0)) {
2478 Reg src = ra_alloc1(as, ref, allow); 2478 Reg src = ra_alloc1(as, ref, allow);
2479 int32_t ofs = 8*(int32_t)(s-1);
2480 emit_movtomro(as, src, base, ofs); 2479 emit_movtomro(as, src, base, ofs);
2481 emit_movmroi(as, base, ofs+4, irt_toitype(ir->t)); 2480 emit_movmroi(as, base, ofs+4, irt_toitype(ir->t));
2482 checkmclim(as); 2481 checkmclim(as);
@@ -2504,7 +2503,7 @@ static void asm_gc_check(ASMState *as, SnapShot *snap)
2504 emit_loadi(as, tmp, (int32_t)as->gcsteps); 2503 emit_loadi(as, tmp, (int32_t)as->gcsteps);
2505 /* We don't know spadj yet, so get the C frame from L->cframe. */ 2504 /* We don't know spadj yet, so get the C frame from L->cframe. */
2506 emit_movmroi(as, tmp, CFRAME_OFS_PC, 2505 emit_movmroi(as, tmp, CFRAME_OFS_PC,
2507 (int32_t)as->T->snapmap[snap->mapofs+snap->nslots]); 2506 (int32_t)as->T->snapmap[snap->mapofs+snap->nent]);
2508 emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK); 2507 emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
2509 lstate = IR(ASMREF_L)->r; 2508 lstate = IR(ASMREF_L)->r;
2510 emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe)); 2509 emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe));
@@ -2965,19 +2964,19 @@ static void asm_head_side(ASMState *as)
2965static void asm_tail_sync(ASMState *as) 2964static void asm_tail_sync(ASMState *as)
2966{ 2965{
2967 SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */ 2966 SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */
2968 BCReg s, nslots = snap->nslots; 2967 MSize n, nent = snap->nent;
2969 SnapEntry *map = &as->T->snapmap[snap->mapofs]; 2968 SnapEntry *map = &as->T->snapmap[snap->mapofs];
2970 SnapEntry *flinks = map + nslots + snap->nframelinks; 2969 SnapEntry *flinks = map + nent + snap->nframelinks;
2971 BCReg newbase = 0; 2970 BCReg newbase = 0;
2972 BCReg secondbase = ~(BCReg)0; 2971 BCReg nslots, topslot = 0;
2973 BCReg topslot = 0;
2974 2972
2975 checkmclim(as); 2973 checkmclim(as);
2976 ra_allocref(as, REF_BASE, RID2RSET(RID_BASE)); 2974 ra_allocref(as, REF_BASE, RID2RSET(RID_BASE));
2977 2975
2978 /* Must check all frames to find topslot (outer can be larger than inner). */ 2976 /* Must check all frames to find topslot (outer can be larger than inner). */
2979 for (s = 0; s < nslots; s++) { 2977 for (n = 0; n < nent; n++) {
2980 IRRef ref = snap_ref(map[s]); 2978 IRRef ref = snap_ref(map[n]);
2979 BCReg s = snap_slot(map[n]);
2981 if (!irref_isk(ref)) { 2980 if (!irref_isk(ref)) {
2982 IRIns *ir = IR(ref); 2981 IRIns *ir = IR(ref);
2983 if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { 2982 if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
@@ -2985,10 +2984,7 @@ static void asm_tail_sync(ASMState *as)
2985 if (isluafunc(fn)) { 2984 if (isluafunc(fn)) {
2986 BCReg fs = s + funcproto(fn)->framesize; 2985 BCReg fs = s + funcproto(fn)->framesize;
2987 if (fs > topslot) topslot = fs; 2986 if (fs > topslot) topslot = fs;
2988 if (s != 0) { 2987 newbase = s;
2989 newbase = s;
2990 if (secondbase == ~(BCReg)0) secondbase = s;
2991 }
2992 } 2988 }
2993 } 2989 }
2994 } 2990 }
@@ -2998,7 +2994,7 @@ static void asm_tail_sync(ASMState *as)
2998 if (as->T->link == TRACE_INTERP) { 2994 if (as->T->link == TRACE_INTERP) {
2999 /* Setup fixed registers for exit to interpreter. */ 2995 /* Setup fixed registers for exit to interpreter. */
3000 emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch); 2996 emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch);
3001 emit_loadi(as, RID_PC, (int32_t)map[nslots]); 2997 emit_loadi(as, RID_PC, (int32_t)map[nent]);
3002 } else if (newbase) { 2998 } else if (newbase) {
3003 /* Save modified BASE for linking to trace with higher start frame. */ 2999 /* Save modified BASE for linking to trace with higher start frame. */
3004 emit_setgl(as, RID_BASE, jit_base); 3000 emit_setgl(as, RID_BASE, jit_base);
@@ -3007,51 +3003,50 @@ static void asm_tail_sync(ASMState *as)
3007 emit_addptr(as, RID_BASE, 8*(int32_t)newbase); 3003 emit_addptr(as, RID_BASE, 8*(int32_t)newbase);
3008 3004
3009 /* Clear stack slots of newly added frames. */ 3005 /* Clear stack slots of newly added frames. */
3006 nslots = snap->nslots;
3010 if (nslots <= topslot) { 3007 if (nslots <= topslot) {
3011 if (nslots < topslot) { 3008 if (nslots < topslot) {
3009 BCReg s;
3012 for (s = nslots; s <= topslot; s++) { 3010 for (s = nslots; s <= topslot; s++) {
3013 emit_movtomro(as, RID_EAX, RID_BASE, 8*(int32_t)s-4); 3011 emit_movtomro(as, RID_EAX, RID_BASE, 8*((int32_t)s-1)+4);
3014 checkmclim(as); 3012 checkmclim(as);
3015 } 3013 }
3016 emit_loadi(as, RID_EAX, LJ_TNIL); 3014 emit_loadi(as, RID_EAX, LJ_TNIL);
3017 } else { 3015 } else {
3018 emit_movmroi(as, RID_BASE, 8*(int32_t)nslots-4, LJ_TNIL); 3016 emit_movmroi(as, RID_BASE, 8*((int32_t)nslots-1)+4, LJ_TNIL);
3019 } 3017 }
3020 } 3018 }
3021 3019
3022 /* Store the value of all modified slots to the Lua stack. */ 3020 /* Store the value of all modified slots to the Lua stack. */
3023 for (s = 0; s < nslots; s++) { 3021 for (n = 0; n < nent; n++) {
3022 BCReg s = snap_slot(map[n]);
3024 int32_t ofs = 8*((int32_t)s-1); 3023 int32_t ofs = 8*((int32_t)s-1);
3025 IRRef ref = snap_ref(map[s]); 3024 IRRef ref = snap_ref(map[n]);
3026 if (ref) { 3025 IRIns *ir = IR(ref);
3027 IRIns *ir = IR(ref); 3026 /* No need to restore readonly slots and unmodified non-parent slots. */
3028 /* No need to restore readonly slots and unmodified non-parent slots. */ 3027 if (ir->o == IR_SLOAD && ir->op1 == s &&
3029 if (ir->o == IR_SLOAD && ir->op1 == s && 3028 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
3030 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) 3029 continue;
3031 continue; 3030 if (irt_isnum(ir->t)) {
3032 if (irt_isnum(ir->t)) { 3031 Reg src = ra_alloc1(as, ref, RSET_FPR);
3033 Reg src = ra_alloc1(as, ref, RSET_FPR); 3032 emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
3034 emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); 3033 } else if (ir->o == IR_FRAME) {
3035 } else if (ir->o == IR_FRAME) { 3034 emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2))));
3036 emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2)))); 3035 if (s != 0) /* Do not overwrite link to previous frame. */
3037 if (s != 0) /* Do not overwrite link to previous frame. */ 3036 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks));
3038 emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks));
3039 } else {
3040 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
3041 if (!irref_isk(ref)) {
3042 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
3043 emit_movtomro(as, src, RID_BASE, ofs);
3044 } else if (!irt_ispri(ir->t)) {
3045 emit_movmroi(as, RID_BASE, ofs, ir->i);
3046 }
3047 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
3048 }
3049 } else { 3037 } else {
3050 lua_assert(!(s > secondbase)); 3038 lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
3039 if (!irref_isk(ref)) {
3040 Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
3041 emit_movtomro(as, src, RID_BASE, ofs);
3042 } else if (!irt_ispri(ir->t)) {
3043 emit_movmroi(as, RID_BASE, ofs, ir->i);
3044 }
3045 emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
3051 } 3046 }
3052 checkmclim(as); 3047 checkmclim(as);
3053 } 3048 }
3054 lua_assert(map + nslots == flinks-1); 3049 lua_assert(map + nent == flinks-1);
3055} 3050}
3056 3051
3057/* Fixup the tail code. */ 3052/* Fixup the tail code. */
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
index 4fce5eb9..345afb51 100644
--- a/src/lj_gdbjit.c
+++ b/src/lj_gdbjit.c
@@ -698,8 +698,8 @@ void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno)
698 lua_State *L = J->L; 698 lua_State *L = J->L;
699 GCproto *pt = &gcref(T->startpt)->pt; 699 GCproto *pt = &gcref(T->startpt)->pt;
700 TraceNo parent = T->ir[REF_BASE].op1; 700 TraceNo parent = T->ir[REF_BASE].op1;
701 uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nslots); 701 uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nent);
702 const BCIns *startpc = (const BCIns *)(uintptr_t)T->snapmap[pcofs]; 702 const BCIns *startpc = snap_pc(T->snapmap[pcofs]);
703 ctx.T = T; 703 ctx.T = T;
704 ctx.mcaddr = (uintptr_t)T->mcode; 704 ctx.mcaddr = (uintptr_t)T->mcode;
705 ctx.szmcode = T->szmcode; 705 ctx.szmcode = T->szmcode;
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 36e60113..1a1e407a 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -112,17 +112,27 @@ typedef uint8_t MCode;
112typedef struct SnapShot { 112typedef struct SnapShot {
113 uint16_t mapofs; /* Offset into snapshot map. */ 113 uint16_t mapofs; /* Offset into snapshot map. */
114 IRRef1 ref; /* First IR ref for this snapshot. */ 114 IRRef1 ref; /* First IR ref for this snapshot. */
115 uint8_t nslots; /* Number of stack slots. */ 115 uint8_t nslots; /* Number of valid slots. */
116 uint8_t nent; /* Number of compressed entries. */
116 uint8_t nframelinks; /* Number of frame links. */ 117 uint8_t nframelinks; /* Number of frame links. */
117 uint8_t count; /* Count of taken exits for this snapshot. */ 118 uint8_t count; /* Count of taken exits for this snapshot. */
118 uint8_t unused1;
119} SnapShot; 119} SnapShot;
120 120
121#define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */ 121#define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */
122 122
123/* Snapshot entry. */ 123/* Compressed snapshot entry. */
124typedef uint32_t SnapEntry; 124typedef uint32_t SnapEntry;
125
126#define SNAP_FRAME 0x010000 /* Slot has frame link. */
127
128#define SNAP(slot, flags, ref) ((SnapEntry)((slot) << 24) + (flags) + (ref))
129#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
130#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
125#define snap_ref(sn) ((sn) & 0xffff) 131#define snap_ref(sn) ((sn) & 0xffff)
132#define snap_slot(sn) ((BCReg)((sn) >> 24))
133#define snap_isframe(sn) ((sn) & SNAP_FRAME)
134#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
135#define snap_setref(sn, ref) (((sn) & 0xffff0000) | (ref))
126 136
127/* Snapshot and exit numbers. */ 137/* Snapshot and exit numbers. */
128typedef uint32_t SnapNo; 138typedef uint32_t SnapNo;
diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c
index 636d5183..90e81526 100644
--- a/src/lj_opt_dce.c
+++ b/src/lj_opt_dce.c
@@ -24,9 +24,9 @@ static void dce_marksnap(jit_State *J)
24 for (i = 0; i < nsnap; i++) { 24 for (i = 0; i < nsnap; i++) {
25 SnapShot *snap = &J->cur.snap[i]; 25 SnapShot *snap = &J->cur.snap[i];
26 SnapEntry *map = &J->cur.snapmap[snap->mapofs]; 26 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
27 BCReg s, nslots = snap->nslots; 27 MSize n, nent = snap->nent;
28 for (s = 0; s < nslots; s++) { 28 for (n = 0; n < nent; n++) {
29 IRRef ref = snap_ref(map[s]); 29 IRRef ref = snap_ref(map[n]);
30 if (!irref_isk(ref)) 30 if (!irref_isk(ref))
31 irt_setmark(IR(ref)->t); 31 irt_setmark(IR(ref)->t);
32 } 32 }
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index f2950fe9..e5ad5b43 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -10,7 +10,6 @@
10 10
11#if LJ_HASJIT 11#if LJ_HASJIT
12 12
13#include "lj_gc.h"
14#include "lj_err.h" 13#include "lj_err.h"
15#include "lj_str.h" 14#include "lj_str.h"
16#include "lj_ir.h" 15#include "lj_ir.h"
@@ -163,21 +162,69 @@ static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi)
163 162
164/* -- Loop unrolling using copy-substitution ------------------------------ */ 163/* -- Loop unrolling using copy-substitution ------------------------------ */
165 164
165/* Copy-substitute snapshot. */
166static void loop_subst_snap(jit_State *J, SnapShot *osnap,
167 SnapEntry *loopmap, IRRef1 *subst)
168{
169 SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs];
170 MSize nmapofs, nframelinks;
171 MSize on, ln, nn, onent = osnap->nent;
172 BCReg nslots = osnap->nslots;
173 SnapShot *snap = &J->cur.snap[J->cur.nsnap];
174 if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
175 nmapofs = J->cur.nsnapmap;
176 J->cur.nsnap++; /* Add new snapshot. */
177 } else { /* Otherwise overwrite previous snapshot. */
178 snap--;
179 nmapofs = snap->mapofs;
180 }
181 J->guardemit.irt = 0;
182 nframelinks = osnap->nframelinks;
183 /* Setup new snapshot. */
184 snap->mapofs = (uint16_t)nmapofs;
185 snap->ref = (IRRef1)J->cur.nins;
186 snap->nframelinks = (uint8_t)nframelinks;
187 snap->nslots = nslots;
188 snap->count = 0;
189 nmap = &J->cur.snapmap[nmapofs];
190 /* Substitute snapshot slots. */
191 on = ln = nn = 0;
192 while (on < onent) {
193 SnapEntry osn = omap[on], lsn = loopmap[ln];
194 if (snap_slot(lsn) < snap_slot(osn)) { /* Copy slot from loop map. */
195 nmap[nn++] = lsn;
196 ln++;
197 } else { /* Copy substituted slot from snapshot map. */
198 if (snap_slot(lsn) == snap_slot(osn)) ln++; /* Shadowed loop slot. */
199 if (!irref_isk(snap_ref(osn)))
200 osn = snap_setref(osn, subst[snap_ref(osn)]);
201 nmap[nn++] = osn;
202 on++;
203 }
204 }
205 while (snap_slot(loopmap[ln]) < nslots) /* Copy remaining loop slots. */
206 nmap[nn++] = loopmap[ln++];
207 snap->nent = (uint8_t)nn;
208 J->cur.nsnapmap = (uint16_t)(nmapofs + nn + nframelinks);
209 omap += onent;
210 nmap += nn;
211 for (nn = 0; nn < nframelinks; nn++) /* Copy frame links. */
212 nmap[nn] = omap[nn];
213}
214
166/* Unroll loop. */ 215/* Unroll loop. */
167static void loop_unroll(jit_State *J) 216static void loop_unroll(jit_State *J)
168{ 217{
169 IRRef1 phi[LJ_MAX_PHI]; 218 IRRef1 phi[LJ_MAX_PHI];
170 uint32_t nphi = 0; 219 uint32_t nphi = 0;
171 IRRef1 *subst; 220 IRRef1 *subst;
172 SnapShot *osnap, *snap; 221 SnapShot *osnap;
173 SnapEntry *loopmap; 222 SnapEntry *loopmap, *psentinel;
174 BCReg loopslots; 223 IRRef ins, invar;
175 MSize nsnap, nsnapmap;
176 IRRef ins, invar, osnapref;
177 224
178 /* Use temp buffer for substitution table. 225 /* Use temp buffer for substitution table.
179 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. 226 ** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
180 ** Note: don't call into the VM or run the GC or the buffer may be gone. 227 ** Caveat: don't call into the VM or run the GC or the buffer may be gone.
181 */ 228 */
182 invar = J->cur.nins; 229 invar = J->cur.nins;
183 subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, 230 subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf,
@@ -187,80 +234,37 @@ static void loop_unroll(jit_State *J)
187 /* LOOP separates the pre-roll from the loop body. */ 234 /* LOOP separates the pre-roll from the loop body. */
188 emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); 235 emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0);
189 236
190 /* Ensure size for copy-substituted snapshots (minus #0 and loop snapshot). */ 237 /* Grow snapshot buffer and map for copy-substituted snapshots.
191 nsnap = J->cur.nsnap; 238 ** Need up to twice the number of snapshots minus #0 and loop snapshot.
192 if (LJ_UNLIKELY(2*nsnap-2 > J->sizesnap)) { 239 ** Need up to twice the number of entries plus fallback substitutions
193 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; 240 ** from the loop snapshot entries for each new snapshot.
194 if (2*nsnap-2 > maxsnap) 241 ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap!
195 lj_trace_err(J, LJ_TRERR_SNAPOV); 242 */
196 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); 243 {
197 J->cur.snap = J->snapbuf; 244 MSize nsnap = J->cur.nsnap;
198 } 245 SnapShot *loopsnap;
199 nsnapmap = J->cur.nsnapmap; /* Use temp. copy to avoid undo. */ 246 lj_snap_grow_buf(J, 2*nsnap-2);
200 if (LJ_UNLIKELY(nsnapmap*2 > J->sizesnapmap)) { 247 lj_snap_grow_map(J, J->cur.nsnapmap*2+(nsnap-2)*J->cur.snap[nsnap-1].nent);
201 J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
202 J->sizesnapmap*sizeof(SnapEntry),
203 2*J->sizesnapmap*sizeof(SnapEntry));
204 J->cur.snapmap = J->snapmapbuf;
205 J->sizesnapmap *= 2;
206 }
207 248
208 /* The loop snapshot is used for fallback substitutions. */ 249 /* The loop snapshot is used for fallback substitutions. */
209 snap = &J->cur.snap[nsnap-1]; 250 loopsnap = &J->cur.snap[nsnap-1];
210 loopmap = &J->cur.snapmap[snap->mapofs]; 251 loopmap = &J->cur.snapmap[loopsnap->mapofs];
211 loopslots = snap->nslots; 252 /* The PC of snapshot #0 and the loop snapshot must match. */
212 /* The PC of snapshot #0 and the loop snapshot must match. */ 253 psentinel = &loopmap[loopsnap->nent];
213 lua_assert(loopmap[loopslots] == J->cur.snapmap[J->cur.snap[0].nslots]); 254 lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]);
255 *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */
256 }
214 257
215 /* Start substitution with snapshot #1 (#0 is empty for root traces). */ 258 /* Start substitution with snapshot #1 (#0 is empty for root traces). */
216 osnap = &J->cur.snap[1]; 259 osnap = &J->cur.snap[1];
217 osnapref = osnap->ref;
218 260
219 /* Copy and substitute all recorded instructions and snapshots. */ 261 /* Copy and substitute all recorded instructions and snapshots. */
220 for (ins = REF_FIRST; ins < invar; ins++) { 262 for (ins = REF_FIRST; ins < invar; ins++) {
221 IRIns *ir; 263 IRIns *ir;
222 IRRef op1, op2; 264 IRRef op1, op2;
223 265
224 /* Copy-substitute snapshot. */ 266 if (ins >= osnap->ref) /* Instruction belongs to next snapshot? */
225 if (ins >= osnapref) { 267 loop_subst_snap(J, osnap++, loopmap, subst); /* Copy-substitute it. */
226 SnapEntry *nmap, *omap = &J->cur.snapmap[osnap->mapofs];
227 BCReg s, nslots;
228 uint32_t nmapofs, nframelinks;
229 if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
230 nmapofs = nsnapmap;
231 snap++; /* Add new snapshot. */
232 } else {
233 nmapofs = snap->mapofs; /* Overwrite previous snapshot. */
234 }
235 J->guardemit.irt = 0;
236 nslots = osnap->nslots;
237 nframelinks = osnap->nframelinks;
238 snap->mapofs = (uint16_t)nmapofs;
239 snap->ref = (IRRef1)J->cur.nins;
240 snap->nslots = (uint8_t)nslots;
241 snap->nframelinks = (uint8_t)nframelinks;
242 snap->count = 0;
243 osnap++;
244 osnapref = osnap->ref;
245 nsnapmap = nmapofs + nslots + nframelinks;
246 nmap = &J->cur.snapmap[nmapofs];
247 /* Substitute snapshot slots. */
248 for (s = 0; s < nslots; s++) {
249 IRRef ref = snap_ref(omap[s]);
250 if (ref) {
251 if (!irref_isk(ref))
252 ref = subst[ref];
253 } else if (s < loopslots) {
254 ref = loopmap[s];
255 }
256 nmap[s] = ref;
257 }
258 /* Copy frame links. */
259 nmap += nslots;
260 omap += nslots;
261 for (s = 0; s < nframelinks; s++)
262 nmap[s] = omap[s];
263 }
264 268
265 /* Substitute instruction operands. */ 269 /* Substitute instruction operands. */
266 ir = IR(ins); 270 ir = IR(ins);
@@ -295,22 +299,24 @@ static void loop_unroll(jit_State *J)
295 } 299 }
296 } 300 }
297 } 301 }
298 if (irt_isguard(J->guardemit)) { /* Guard inbetween? */ 302 if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */
299 J->cur.nsnapmap = (uint16_t)nsnapmap; 303 J->cur.nsnapmap = (uint16_t)J->cur.snap[--J->cur.nsnap].mapofs;
300 snap++;
301 } else {
302 J->cur.nsnapmap = (uint16_t)snap->mapofs; /* Last snapshot is redundant. */
303 }
304 J->cur.nsnap = (uint16_t)(snap - J->cur.snap);
305 lua_assert(J->cur.nsnapmap <= J->sizesnapmap); 304 lua_assert(J->cur.nsnapmap <= J->sizesnapmap);
305 *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */
306 306
307 loop_emit_phi(J, subst, phi, nphi); 307 loop_emit_phi(J, subst, phi, nphi);
308} 308}
309 309
310/* Undo any partial changes made by the loop optimization. */ 310/* Undo any partial changes made by the loop optimization. */
311static void loop_undo(jit_State *J, IRRef ins) 311static void loop_undo(jit_State *J, IRRef ins, MSize nsnap)
312{ 312{
313 ptrdiff_t i; 313 ptrdiff_t i;
314 SnapShot *snap = &J->cur.snap[nsnap-1];
315 SnapEntry *map = J->cur.snapmap;
316 map[snap->mapofs + snap->nent] = map[J->cur.snap[0].nent]; /* Restore PC. */
317 J->cur.nsnapmap = (uint16_t)(snap->mapofs + snap->nent + snap->nframelinks);
318 J->cur.nsnap = nsnap;
319 J->guardemit.irt = 0;
314 lj_ir_rollback(J, ins); 320 lj_ir_rollback(J, ins);
315 for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */ 321 for (i = 0; i < BPROP_SLOTS; i++) { /* Remove backprop. cache entries. */
316 BPropEntry *bp = &J->bpropcache[i]; 322 BPropEntry *bp = &J->bpropcache[i];
@@ -336,6 +342,7 @@ static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud)
336int lj_opt_loop(jit_State *J) 342int lj_opt_loop(jit_State *J)
337{ 343{
338 IRRef nins = J->cur.nins; 344 IRRef nins = J->cur.nins;
345 MSize nsnap = J->cur.nsnap;
339 int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); 346 int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt);
340 if (LJ_UNLIKELY(errcode)) { 347 if (LJ_UNLIKELY(errcode)) {
341 lua_State *L = J->L; 348 lua_State *L = J->L;
@@ -348,8 +355,7 @@ int lj_opt_loop(jit_State *J)
348 if (--J->instunroll < 0) /* But do not unroll forever. */ 355 if (--J->instunroll < 0) /* But do not unroll forever. */
349 break; 356 break;
350 L->top--; /* Remove error object. */ 357 L->top--; /* Remove error object. */
351 J->guardemit.irt = 0; 358 loop_undo(J, nins, nsnap);
352 loop_undo(J, nins);
353 return 1; /* Loop optimization failed, continue recording. */ 359 return 1; /* Loop optimization failed, continue recording. */
354 default: 360 default:
355 break; 361 break;
diff --git a/src/lj_record.c b/src/lj_record.c
index 6af25ccb..3f442088 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -1696,7 +1696,7 @@ static void optstate_comp(jit_State *J, int cond)
1696 const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0); 1696 const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0);
1697 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; 1697 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
1698 /* Avoid re-recording the comparison in side traces. */ 1698 /* Avoid re-recording the comparison in side traces. */
1699 J->cur.snapmap[snap->mapofs + snap->nslots] = u32ptr(npc); 1699 J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
1700 J->needsnap = 1; 1700 J->needsnap = 1;
1701 /* Shrink last snapshot if possible. */ 1701 /* Shrink last snapshot if possible. */
1702 if (bc_a(jmpins) < J->maxslot) { 1702 if (bc_a(jmpins) < J->maxslot) {
@@ -2159,61 +2159,62 @@ static void rec_setup_side(jit_State *J, Trace *T)
2159{ 2159{
2160 SnapShot *snap = &T->snap[J->exitno]; 2160 SnapShot *snap = &T->snap[J->exitno];
2161 SnapEntry *map = &T->snapmap[snap->mapofs]; 2161 SnapEntry *map = &T->snapmap[snap->mapofs];
2162 BCReg s, nslots = snap->nslots; 2162 MSize n, nent = snap->nent;
2163 BloomFilter seen = 0; 2163 BloomFilter seen = 0;
2164 for (s = 0; s < nslots; s++) { 2164 /* Emit IR for slots inherited from parent snapshot. */
2165 IRRef ref = snap_ref(map[s]); 2165 for (n = 0; n < nent; n++) {
2166 if (ref) { 2166 IRRef ref = snap_ref(map[n]);
2167 IRIns *ir = &T->ir[ref]; 2167 BCReg s = snap_slot(map[n]);
2168 TRef tr = 0; 2168 IRIns *ir = &T->ir[ref];
2169 /* The bloom filter avoids O(nslots^2) overhead for de-duping slots. */ 2169 TRef tr;
2170 if (bloomtest(seen, ref)) { 2170 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
2171 BCReg j; 2171 if (bloomtest(seen, ref)) {
2172 for (j = 0; j < s; j++) 2172 MSize j;
2173 if (snap_ref(map[j]) == ref) { 2173 for (j = 0; j < n; j++)
2174 if (ir->o == IR_FRAME && irt_isfunc(ir->t)) { 2174 if (snap_ref(map[j]) == ref) {
2175 lua_assert(s != 0); 2175 tr = J->slot[snap_slot(map[j])];
2176 J->baseslot = s+1; 2176 if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
2177 J->framedepth++; 2177 lua_assert(s != 0);
2178 }
2179 tr = J->slot[j];
2180 goto dupslot;
2181 }
2182 }
2183 bloomset(seen, ref);
2184 switch ((IROp)ir->o) {
2185 case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break;
2186 case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
2187 case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
2188 case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
2189 case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
2190 if (irt_isfunc(ir->t)) {
2191 if (s != 0) {
2192 J->baseslot = s+1; 2178 J->baseslot = s+1;
2193 J->framedepth++; 2179 J->framedepth++;
2194 } 2180 }
2195 tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2])); 2181 goto dupslot;
2196 tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
2197 } else {
2198 tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void));
2199 tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr);
2200 } 2182 }
2201 break; 2183 }
2202 case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */ 2184 bloomset(seen, ref);
2203 tr = emitir_raw(ir->ot & ~IRT_GUARD, s, 2185 switch ((IROp)ir->o) {
2204 (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); 2186 /* Only have to deal with constants that can occur in stack slots. */
2205 break; 2187 case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break;
2206 default: /* Parent refs are already typed and don't need a guard. */ 2188 case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
2207 tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s, 2189 case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
2208 IRSLOAD_INHERIT|IRSLOAD_PARENT); 2190 case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
2209 break; 2191 case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
2192 if (irt_isfunc(ir->t)) {
2193 if (s != 0) {
2194 J->baseslot = s+1;
2195 J->framedepth++;
2196 }
2197 tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2]));
2198 tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
2199 } else {
2200 tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void));
2201 tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr);
2210 } 2202 }
2211 dupslot: 2203 break;
2212 J->slot[s] = tr; 2204 case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */
2205 tr = emitir_raw(ir->ot & ~IRT_GUARD, s,
2206 (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT);
2207 break;
2208 default: /* Parent refs are already typed and don't need a guard. */
2209 tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s,
2210 IRSLOAD_INHERIT|IRSLOAD_PARENT);
2211 break;
2213 } 2212 }
2213 dupslot:
2214 J->slot[s] = tr;
2214 } 2215 }
2215 J->base = J->slot + J->baseslot; 2216 J->base = J->slot + J->baseslot;
2216 J->maxslot = nslots - J->baseslot; 2217 J->maxslot = snap->nslots - J->baseslot;
2217 lj_snap_add(J); 2218 lj_snap_add(J);
2218} 2219}
2219 2220
@@ -2259,7 +2260,7 @@ void lj_record_setup(jit_State *J)
2259 J->cur.root = (uint16_t)root; 2260 J->cur.root = (uint16_t)root;
2260 J->cur.startins = BCINS_AD(BC_JMP, 0, 0); 2261 J->cur.startins = BCINS_AD(BC_JMP, 0, 0);
2261 /* Check whether we could at least potentially form an extra loop. */ 2262 /* Check whether we could at least potentially form an extra loop. */
2262 if (J->exitno == 0 && T->snap[0].nslots == 1 && T->snapmap[0] == 0) { 2263 if (J->exitno == 0 && T->snap[0].nent == 0) {
2263 /* We can narrow a FORL for some side traces, too. */ 2264 /* We can narrow a FORL for some side traces, too. */
2264 if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI && 2265 if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI &&
2265 bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { 2266 bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) {
diff --git a/src/lj_snap.c b/src/lj_snap.c
index f262e1c9..d22c90a4 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -23,28 +23,50 @@
23/* Some local macros to save typing. Undef'd at the end. */ 23/* Some local macros to save typing. Undef'd at the end. */
24#define IR(ref) (&J->cur.ir[(ref)]) 24#define IR(ref) (&J->cur.ir[(ref)])
25 25
26/* -- Snapshot buffer allocation ------------------------------------------ */
27
28/* Grow snapshot buffer. */
29void lj_snap_grow_buf_(jit_State *J, MSize need)
30{
31 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
32 if (need > maxsnap)
33 lj_trace_err(J, LJ_TRERR_SNAPOV);
34 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
35 J->cur.snap = J->snapbuf;
36}
37
38/* Grow snapshot map buffer. */
39void lj_snap_grow_map_(jit_State *J, MSize need)
40{
41 if (need < 2*J->sizesnapmap)
42 need = 2*J->sizesnapmap;
43 else if (need < 64)
44 need = 64;
45 J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
46 J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
47 J->cur.snapmap = J->snapmapbuf;
48 J->sizesnapmap = need;
49}
50
26/* -- Snapshot generation ------------------------------------------------- */ 51/* -- Snapshot generation ------------------------------------------------- */
27 52
28/* NYI: Snapshots are in need of a redesign. The current storage model for 53/* NYI: IR_FRAME should be eliminated, too. */
29** snapshot maps is too wasteful. They could be compressed (1D or 2D) and
30** made more flexible at the same time. Iterators should no longer need to
31** skip unmodified slots. IR_FRAME should be eliminated, too.
32*/
33 54
34/* Add all modified slots to the snapshot. */ 55/* Add all modified slots to the snapshot. */
35static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) 56static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
36{ 57{
37 BCReg s; 58 BCReg s;
59 MSize n = 0;
38 for (s = 0; s < nslots; s++) { 60 for (s = 0; s < nslots; s++) {
39 IRRef ref = tref_ref(J->slot[s]); 61 IRRef ref = tref_ref(J->slot[s]);
40 if (ref) { 62 if (ref) {
41 IRIns *ir = IR(ref); 63 IRIns *ir = IR(ref);
42 if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT)) 64 if (!(ir->o == IR_SLOAD && ir->op1 == s &&
43 ref = 0; 65 !(ir->op2 & IRSLOAD_INHERIT)))
66 map[n++] = SNAP(s, ir->o == IR_FRAME ? SNAP_FRAME : 0, ref);
44 } 67 }
45 map[s] = (SnapEntry)ref;
46 } 68 }
47 return nslots; 69 return n;
48} 70}
49 71
50/* Add frame links at the end of the snapshot. */ 72/* Add frame links at the end of the snapshot. */
@@ -53,17 +75,17 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map)
53 cTValue *frame = J->L->base - 1; 75 cTValue *frame = J->L->base - 1;
54 cTValue *lim = J->L->base - J->baseslot; 76 cTValue *lim = J->L->base - J->baseslot;
55 MSize f = 0; 77 MSize f = 0;
56 map[f++] = u32ptr(J->pc); 78 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
57 while (frame > lim) { 79 while (frame > lim) { /* Backwards traversal of all frames above base. */
58 if (frame_islua(frame)) { 80 if (frame_islua(frame)) {
59 map[f++] = u32ptr(frame_pc(frame)); 81 map[f++] = SNAP_MKPC(frame_pc(frame));
60 frame = frame_prevl(frame); 82 frame = frame_prevl(frame);
61 } else if (frame_ispcall(frame)) { 83 } else if (frame_ispcall(frame)) {
62 map[f++] = (uint32_t)frame_ftsz(frame); 84 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
63 frame = frame_prevd(frame); 85 frame = frame_prevd(frame);
64 } else if (frame_iscont(frame)) { 86 } else if (frame_iscont(frame)) {
65 map[f++] = (uint32_t)frame_ftsz(frame); 87 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
66 map[f++] = u32ptr(frame_contpc(frame)); 88 map[f++] = SNAP_MKPC(frame_contpc(frame));
67 frame = frame_prevd(frame); 89 frame = frame_prevd(frame);
68 } else { 90 } else {
69 lua_assert(0); 91 lua_assert(0);
@@ -76,28 +98,19 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map)
76static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) 98static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
77{ 99{
78 BCReg nslots = J->baseslot + J->maxslot; 100 BCReg nslots = J->baseslot + J->maxslot;
79 MSize nsm, nframelinks; 101 MSize nent, nframelinks;
80 SnapEntry *p; 102 SnapEntry *p;
81 /* Conservative estimate. Continuation frames need 2 slots. */ 103 /* Conservative estimate. Continuation frames need 2 slots. */
82 nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1; 104 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth*2+1);
83 if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */
84 if (nsm < 2*J->sizesnapmap)
85 nsm = 2*J->sizesnapmap;
86 else if (nsm < 64)
87 nsm = 64;
88 J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
89 J->sizesnapmap*sizeof(SnapEntry), nsm*sizeof(SnapEntry));
90 J->cur.snapmap = J->snapmapbuf;
91 J->sizesnapmap = nsm;
92 }
93 p = &J->cur.snapmap[nsnapmap]; 105 p = &J->cur.snapmap[nsnapmap];
94 nslots = snapshot_slots(J, p, nslots); 106 nent = snapshot_slots(J, p, nslots);
95 nframelinks = snapshot_framelinks(J, p + nslots); 107 nframelinks = snapshot_framelinks(J, p + nent);
96 J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks); 108 J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + nframelinks);
97 snap->mapofs = (uint16_t)nsnapmap; 109 snap->mapofs = (uint16_t)nsnapmap;
98 snap->ref = (IRRef1)J->cur.nins; 110 snap->ref = (IRRef1)J->cur.nins;
99 snap->nslots = (uint8_t)nslots; 111 snap->nent = (uint8_t)nent;
100 snap->nframelinks = (uint8_t)nframelinks; 112 snap->nframelinks = (uint8_t)nframelinks;
113 snap->nslots = (uint8_t)nslots;
101 snap->count = 0; 114 snap->count = 0;
102} 115}
103 116
@@ -111,14 +124,7 @@ void lj_snap_add(jit_State *J)
111 (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { 124 (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
112 nsnapmap = J->cur.snap[--nsnap].mapofs; 125 nsnapmap = J->cur.snap[--nsnap].mapofs;
113 } else { 126 } else {
114 /* Need to grow snapshot buffer? */ 127 lj_snap_grow_buf(J, nsnap+1);
115 if (LJ_UNLIKELY(nsnap >= J->sizesnap)) {
116 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
117 if (nsnap >= maxsnap)
118 lj_trace_err(J, LJ_TRERR_SNAPOV);
119 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
120 J->cur.snap = J->snapbuf;
121 }
122 J->cur.nsnap = (uint16_t)(nsnap+1); 128 J->cur.nsnap = (uint16_t)(nsnap+1);
123 } 129 }
124 J->mergesnap = 0; 130 J->mergesnap = 0;
@@ -131,14 +137,21 @@ void lj_snap_shrink(jit_State *J)
131{ 137{
132 BCReg nslots = J->baseslot + J->maxslot; 138 BCReg nslots = J->baseslot + J->maxslot;
133 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; 139 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
134 SnapEntry *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots]; 140 SnapEntry *map = &J->cur.snapmap[snap->mapofs];
135 SnapEntry *nflinks = &J->cur.snapmap[snap->mapofs + nslots]; 141 MSize nent = snap->nent;
136 uint32_t s, nframelinks = snap->nframelinks;
137 lua_assert(nslots < snap->nslots); 142 lua_assert(nslots < snap->nslots);
138 snap->nslots = (uint8_t)nslots; 143 snap->nslots = (uint8_t)nslots;
139 J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks); 144 if (nent > 0 && snap_slot(map[nent-1]) >= nslots) {
140 for (s = 0; s < nframelinks; s++) /* Move frame links down. */ 145 MSize s, delta, nframelinks = snap->nframelinks;
141 nflinks[s] = oflinks[s]; 146 for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--)
147 ;
148 delta = snap->nent - nent;
149 snap->nent = (uint8_t)nent;
150 J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + nframelinks);
151 map += nent;
152 for (s = 0; s < nframelinks; s++) /* Move frame links down. */
153 map[s] = map[s+delta];
154 }
142} 155}
143 156
144/* -- Snapshot access ----------------------------------------------------- */ 157/* -- Snapshot access ----------------------------------------------------- */
@@ -167,21 +180,24 @@ static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs)
167 return rs; 180 return rs;
168} 181}
169 182
170/* Convert a snapshot into a linear slot -> RegSP map. */ 183/* Convert a snapshot into a linear slot -> RegSP map.
184** Note: unused slots are not initialized!
185*/
171void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno) 186void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno)
172{ 187{
173 SnapShot *snap = &T->snap[snapno]; 188 SnapShot *snap = &T->snap[snapno];
174 BCReg s, nslots = snap->nslots; 189 MSize n, nent = snap->nent;
175 SnapEntry *map = &T->snapmap[snap->mapofs]; 190 SnapEntry *map = &T->snapmap[snap->mapofs];
176 BloomFilter rfilt = snap_renamefilter(T, snapno); 191 BloomFilter rfilt = snap_renamefilter(T, snapno);
177 for (s = 0; s < nslots; s++) { 192 for (n = 0; n < nent; n++) {
178 IRRef ref = snap_ref(map[s]); 193 SnapEntry sn = map[n];
194 IRRef ref = snap_ref(sn);
179 if (!irref_isk(ref)) { 195 if (!irref_isk(ref)) {
180 IRIns *ir = &T->ir[ref]; 196 IRIns *ir = &T->ir[ref];
181 uint32_t rs = ir->prev; 197 uint32_t rs = ir->prev;
182 if (bloomtest(rfilt, ref)) 198 if (bloomtest(rfilt, ref))
183 rs = snap_renameref(T, snapno, ref, rs); 199 rs = snap_renameref(T, snapno, ref, rs);
184 rsmap[s] = (uint16_t)rs; 200 rsmap[snap_slot(sn)] = (uint16_t)rs;
185 } 201 }
186 } 202 }
187} 203}
@@ -193,89 +209,88 @@ void lj_snap_restore(jit_State *J, void *exptr)
193 SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ 209 SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
194 Trace *T = J->trace[J->parent]; 210 Trace *T = J->trace[J->parent];
195 SnapShot *snap = &T->snap[snapno]; 211 SnapShot *snap = &T->snap[snapno];
196 BCReg s, nslots = snap->nslots; 212 MSize n, nent = snap->nent;
197 SnapEntry *map = &T->snapmap[snap->mapofs]; 213 SnapEntry *map = &T->snapmap[snap->mapofs];
198 SnapEntry *flinks = map + nslots + snap->nframelinks; 214 SnapEntry *flinks = map + nent + snap->nframelinks;
199 TValue *o, *newbase, *ntop; 215 BCReg nslots = snap->nslots;
216 TValue *frame;
200 BloomFilter rfilt = snap_renamefilter(T, snapno); 217 BloomFilter rfilt = snap_renamefilter(T, snapno);
201 lua_State *L = J->L; 218 lua_State *L = J->L;
202 219
203 /* Make sure the stack is big enough for the slots from the snapshot. */ 220 /* Make sure the stack is big enough for the slots from the snapshot. */
204 if (L->base + nslots >= L->maxstack) { 221 if (LJ_UNLIKELY(L->base + nslots > L->maxstack)) {
205 L->top = curr_topL(L); 222 L->top = curr_topL(L);
206 lj_state_growstack(L, nslots - curr_proto(L)->framesize); 223 lj_state_growstack(L, nslots - curr_proto(L)->framesize);
207 } 224 }
208 225
209 /* Fill stack slots with data from the registers and spill slots. */ 226 /* Fill stack slots with data from the registers and spill slots. */
210 newbase = NULL; 227 frame = L->base-1;
211 ntop = L->base; 228 for (n = 0; n < nent; n++) {
212 for (s = 0, o = L->base-1; s < nslots; s++, o++) { 229 IRRef ref = snap_ref(map[n]);
213 IRRef ref = snap_ref(map[s]); 230 BCReg s = snap_slot(map[n]);
214 if (ref) { 231 TValue *o = &frame[s]; /* Stack slots are relative to start frame. */
215 IRIns *ir = &T->ir[ref]; 232 IRIns *ir = &T->ir[ref];
216 if (irref_isk(ref)) { /* Restore constant slot. */ 233 if (irref_isk(ref)) { /* Restore constant slot. */
217 lj_ir_kvalue(L, o, ir); 234 lj_ir_kvalue(L, o, ir);
218 } else { 235 } else {
219 IRType1 t = ir->t; 236 IRType1 t = ir->t;
220 RegSP rs = ir->prev; 237 RegSP rs = ir->prev;
221 if (LJ_UNLIKELY(bloomtest(rfilt, ref))) 238 if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
222 rs = snap_renameref(T, snapno, ref, rs); 239 rs = snap_renameref(T, snapno, ref, rs);
223 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ 240 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
224 int32_t *sps = &ex->spill[regsp_spill(rs)]; 241 int32_t *sps = &ex->spill[regsp_spill(rs)];
225 if (irt_isinteger(t)) { 242 if (irt_isinteger(t)) {
226 setintV(o, *sps); 243 setintV(o, *sps);
227 } else if (irt_isnum(t)) { 244 } else if (irt_isnum(t)) {
228 o->u64 = *(uint64_t *)sps; 245 o->u64 = *(uint64_t *)sps;
229 } else { 246 } else {
230 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ 247 lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
231 setgcrefi(o->gcr, *sps); 248 setgcrefi(o->gcr, *sps);
232 setitype(o, irt_toitype(t)); 249 setitype(o, irt_toitype(t));
233 } 250 }
234 } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */ 251 } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */
235 Reg r = regsp_reg(rs); 252 Reg r = regsp_reg(rs);
236 if (irt_isinteger(t)) { 253 if (irt_isinteger(t)) {
237 setintV(o, ex->gpr[r-RID_MIN_GPR]); 254 setintV(o, ex->gpr[r-RID_MIN_GPR]);
238 } else if (irt_isnum(t)) { 255 } else if (irt_isnum(t)) {
239 setnumV(o, ex->fpr[r-RID_MIN_FPR]); 256 setnumV(o, ex->fpr[r-RID_MIN_FPR]);
240 } else { 257 } else {
241 if (!irt_ispri(t)) 258 if (!irt_ispri(t))
242 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); 259 setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
243 setitype(o, irt_toitype(t)); 260 setitype(o, irt_toitype(t));
244 } 261 }
245 } else { /* Restore frame slot. */ 262 } else { /* Restore frame slot. */
246 lua_assert(ir->o == IR_FRAME); 263 lua_assert(ir->o == IR_FRAME);
247 /* This works for both PTR and FUNC IR_FRAME. */ 264 /* This works for both PTR and FUNC IR_FRAME. */
248 setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void)); 265 setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void));
249 if (s != 0) /* Do not overwrite link to previous frame. */ 266 if (s != 0) /* Do not overwrite link to previous frame. */
250 o->fr.tp.ftsz = (int32_t)*--flinks; 267 o->fr.tp.ftsz = (int32_t)*--flinks;
251 if (irt_isfunc(ir->t)) { 268 if (irt_isfunc(ir->t)) {
252 GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr)); 269 GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
253 if (isluafunc(fn)) { 270 if (isluafunc(fn)) {
254 TValue *fs; 271 MSize framesize = funcproto(fn)->framesize;
255 fs = o+1 + funcproto(fn)->framesize; 272 TValue *fs;
256 if (fs > ntop) ntop = fs; /* Update top for newly added frames. */ 273 L->base = ++o;
257 if (s != 0) newbase = o+1; 274 if (LJ_UNLIKELY(o + framesize > L->maxstack)) { /* Grow again? */
275 ptrdiff_t fsave = savestack(L, frame);
276 L->top = o;
277 lj_state_growstack(L, framesize);
278 frame = restorestack(L, fsave);
279 o = L->top;
258 } 280 }
281 fs = o + framesize;
282 if (s == 0) /* Only partially clear tail call frame at #0. */
283 o = &frame[nslots];
284 while (o < fs) /* Clear slots of newly added frames. */
285 setnilV(o++);
259 } 286 }
260 } 287 }
261 } 288 }
262 } else {
263 lua_assert(!newbase);
264 } 289 }
265 } 290 }
266 if (newbase) L->base = newbase;
267 if (ntop >= L->maxstack) { /* Need to grow the stack again. */
268 MSize need = (MSize)(ntop - o);
269 L->top = o;
270 lj_state_growstack(L, need);
271 o = L->top;
272 ntop = o + need;
273 }
274 L->top = curr_topL(L); 291 L->top = curr_topL(L);
275 for (; o < ntop; o++) /* Clear remainder of newly added frames. */ 292 J->pc = snap_pc(*--flinks);
276 setnilV(o); 293 lua_assert(map + nent == flinks);
277 lua_assert(map + nslots == flinks-1);
278 J->pc = (const BCIns *)(uintptr_t)(*--flinks);
279} 294}
280 295
281#undef IR 296#undef IR
diff --git a/src/lj_snap.h b/src/lj_snap.h
index 776a0bcf..ed7d98a1 100644
--- a/src/lj_snap.h
+++ b/src/lj_snap.h
@@ -14,6 +14,19 @@ LJ_FUNC void lj_snap_add(jit_State *J);
14LJ_FUNC void lj_snap_shrink(jit_State *J); 14LJ_FUNC void lj_snap_shrink(jit_State *J);
15LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno); 15LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno);
16LJ_FUNC void lj_snap_restore(jit_State *J, void *exptr); 16LJ_FUNC void lj_snap_restore(jit_State *J, void *exptr);
17LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need);
18LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need);
19
20static LJ_AINLINE void lj_snap_grow_buf(jit_State *J, MSize need)
21{
22 if (LJ_UNLIKELY(need > J->sizesnap)) lj_snap_grow_buf_(J, need);
23}
24
25static LJ_AINLINE void lj_snap_grow_map(jit_State *J, MSize need)
26{
27 if (LJ_UNLIKELY(need > J->sizesnapmap)) lj_snap_grow_map_(J, need);
28}
29
17#endif 30#endif
18 31
19#endif 32#endif
diff --git a/src/lj_trace.c b/src/lj_trace.c
index a5468655..fb36c7ee 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -161,8 +161,8 @@ void lj_trace_reenableproto(GCproto *pt)
161static void trace_unpatch(jit_State *J, Trace *T) 161static void trace_unpatch(jit_State *J, Trace *T)
162{ 162{
163 BCOp op = bc_op(T->startins); 163 BCOp op = bc_op(T->startins);
164 uint32_t pcofs = T->snap[0].mapofs + T->snap[0].nslots; 164 MSize pcofs = T->snap[0].mapofs + T->snap[0].nent;
165 BCIns *pc = ((BCIns *)(uintptr_t)T->snapmap[pcofs]) - 1; 165 BCIns *pc = ((BCIns *)snap_pc(T->snapmap[pcofs])) - 1;
166 switch (op) { 166 switch (op) {
167 case BC_FORL: 167 case BC_FORL:
168 lua_assert(bc_op(*pc) == BC_JFORI); 168 lua_assert(bc_op(*pc) == BC_JFORI);
@@ -352,7 +352,6 @@ static void trace_start(jit_State *J)
352 J->cur.ir = J->irbuf; 352 J->cur.ir = J->irbuf;
353 J->cur.snap = J->snapbuf; 353 J->cur.snap = J->snapbuf;
354 J->cur.snapmap = J->snapmapbuf; 354 J->cur.snapmap = J->snapmapbuf;
355 /* J->cur.nsnapmap = 0; */
356 J->mergesnap = 0; 355 J->mergesnap = 0;
357 J->needsnap = 0; 356 J->needsnap = 0;
358 J->guardemit.irt = 0; 357 J->guardemit.irt = 0;